[PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
@ 2009-05-29 14:16 Rusty Russell
  2009-06-02  8:07 ` Mark McLoughlin
                   ` (3 more replies)
  0 siblings, 4 replies; 44+ messages in thread
From: Rusty Russell @ 2009-05-29 14:16 UTC (permalink / raw)
  To: netdev; +Cc: virtualization, Herbert Xu, Herbert Xu


This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
"virtio: wean net driver off NETDEV_TX_BUSY".

The complexity of queuing an skb (setting a tasklet to re-xmit) is
questionable, especially once we get rid of the other reason for the
tasklet in the next patch.

If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
might be frowned upon, but it's common and not going away any time
soon.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/net/virtio_net.c |   49 ++++++++++-------------------------------------
 1 file changed, 11 insertions(+), 38 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -47,9 +47,6 @@ struct virtnet_info
 	struct napi_struct napi;
 	unsigned int status;
 
-	/* The skb we couldn't send because buffers were full. */
-	struct sk_buff *last_xmit_skb;
-
 	/* If we need to free in a timer, this is it. */
 	struct timer_list xmit_free_timer;
 
@@ -116,9 +113,8 @@ static void skb_xmit_done(struct virtque
 	/* We were probably waiting for more output buffers. */
 	netif_wake_queue(vi->dev);
 
-	/* Make sure we re-xmit last_xmit_skb: if there are no more packets
-	 * queued, start_xmit won't be called. */
-	tasklet_schedule(&vi->tasklet);
+	if (vi->free_in_tasklet)
+		tasklet_schedule(&vi->tasklet);
 }
 
 static void receive_skb(struct net_device *dev, struct sk_buff *skb,
@@ -509,12 +505,7 @@ static void xmit_tasklet(unsigned long d
 	struct virtnet_info *vi = (void *)data;
 
 	netif_tx_lock_bh(vi->dev);
-	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
-		vi->svq->vq_ops->kick(vi->svq);
-		vi->last_xmit_skb = NULL;
-	}
-	if (vi->free_in_tasklet)
-		free_old_xmit_skbs(vi);
+	free_old_xmit_skbs(vi);
 	netif_tx_unlock_bh(vi->dev);
 }
 
@@ -526,27 +517,14 @@ again:
 	/* Free up any pending old buffers before queueing new ones. */
 	free_old_xmit_skbs(vi);
 
-	/* If we has a buffer left over from last time, send it now. */
-	if (unlikely(vi->last_xmit_skb) &&
-	    xmit_skb(vi, vi->last_xmit_skb) != 0)
-		goto stop_queue;
+	/* Put new one in send queue and do transmit */
+	__skb_queue_head(&vi->send, skb);
+	if (likely(xmit_skb(vi, skb) == 0)) {
+		vi->svq->vq_ops->kick(vi->svq);
+		return NETDEV_TX_OK;
+	}
 
-	vi->last_xmit_skb = NULL;
-
-	/* Put new one in send queue and do transmit */
-	if (likely(skb)) {
-		__skb_queue_head(&vi->send, skb);
-		if (xmit_skb(vi, skb) != 0) {
-			vi->last_xmit_skb = skb;
-			skb = NULL;
-			goto stop_queue;
-		}
-	}
-done:
-	vi->svq->vq_ops->kick(vi->svq);
-	return NETDEV_TX_OK;
-
-stop_queue:
+	/* Ring too full for this packet. */
 	pr_debug("%s: virtio not prepared to send\n", dev->name);
 	netif_stop_queue(dev);
 
@@ -557,12 +535,7 @@ stop_queue:
 		netif_start_queue(dev);
 		goto again;
 	}
-	if (skb) {
-		/* Drop this skb: we only queue one. */
-		vi->dev->stats.tx_dropped++;
-		kfree_skb(skb);
-	}
-	goto done;
+	return NETDEV_TX_BUSY;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-05-29 14:16 [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb Rusty Russell
@ 2009-06-02  8:07 ` Mark McLoughlin
  2009-06-02 14:04   ` Rusty Russell
  2009-06-02 14:04   ` Rusty Russell
  2009-06-02  8:07 ` Mark McLoughlin
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 44+ messages in thread
From: Mark McLoughlin @ 2009-06-02  8:07 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization, Herbert Xu

On Fri, 2009-05-29 at 23:46 +0930, Rusty Russell wrote:

> This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> "virtio: wean net driver off NETDEV_TX_BUSY".
> 
> The complexity of queuing an skb (setting a tasklet to re-xmit) is
> questionable,

It certainly adds some subtle complexities to start_xmit() 

>  especially once we get rid of the other reason for the
> tasklet in the next patch.
> 
> If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> might be frowned upon, but it's common and not going away any time
> soon.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> ---
>  drivers/net/virtio_net.c |   49 ++++++++++-------------------------------------
>  1 file changed, 11 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
>  
> @@ -526,27 +517,14 @@ again:
>  	/* Free up any pending old buffers before queueing new ones. */
>  	free_old_xmit_skbs(vi);
>  
> -	/* If we has a buffer left over from last time, send it now. */
> -	if (unlikely(vi->last_xmit_skb) &&
> -	    xmit_skb(vi, vi->last_xmit_skb) != 0)
> -		goto stop_queue;
> +	/* Put new one in send queue and do transmit */
> +	__skb_queue_head(&vi->send, skb);
> +	if (likely(xmit_skb(vi, skb) == 0)) {
> +		vi->svq->vq_ops->kick(vi->svq);
> +		return NETDEV_TX_OK;
> +	}

Hmm, is it okay to leave the skb on the send queue if we return
NETDEV_TX_BUSY?

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02  8:07 ` Mark McLoughlin
@ 2009-06-02 14:04   ` Rusty Russell
  2009-06-02 14:04   ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-02 14:04 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: netdev, Herbert Xu, virtualization

On Tue, 2 Jun 2009 05:37:30 pm Mark McLoughlin wrote:
> On Fri, 2009-05-29 at 23:46 +0930, Rusty Russell wrote:
> > This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> > "virtio: wean net driver off NETDEV_TX_BUSY".
> >
> > The complexity of queuing an skb (setting a tasklet to re-xmit) is
> > questionable,
>
> It certainly adds some subtle complexities to start_xmit()
>
> >  especially once we get rid of the other reason for the
> > tasklet in the next patch.
> >
> > If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> > might be frowned upon, but it's common and not going away any time
> > soon.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > ---
> >  drivers/net/virtio_net.c |   49
> > ++++++++++------------------------------------- 1 file changed, 11
> > insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> >
> > @@ -526,27 +517,14 @@ again:
> >  	/* Free up any pending old buffers before queueing new ones. */
> >  	free_old_xmit_skbs(vi);
> >
> > -	/* If we has a buffer left over from last time, send it now. */
> > -	if (unlikely(vi->last_xmit_skb) &&
> > -	    xmit_skb(vi, vi->last_xmit_skb) != 0)
> > -		goto stop_queue;
> > +	/* Put new one in send queue and do transmit */
> > +	__skb_queue_head(&vi->send, skb);
> > +	if (likely(xmit_skb(vi, skb) == 0)) {
> > +		vi->svq->vq_ops->kick(vi->svq);
> > +		return NETDEV_TX_OK;
> > +	}
>
> Hmm, is it okay to leave the skb on the send queue if we return
> NETDEV_TX_BUSY?

Certainly not.  That's a bug.  Incremental fix is:

diff -u b/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- b/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -524,8 +524,9 @@
 		return NETDEV_TX_OK;
 	}
 
-	/* Ring too full for this packet. */
+	/* Ring too full for this packet, remove it from queue again. */
 	pr_debug("%s: virtio not prepared to send\n", dev->name);
+	__skb_unlink(skb, &vi->send);
 	netif_stop_queue(dev);
 
 	/* Activate callback for using skbs: if this returns false it

Thanks!
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02  8:07 ` Mark McLoughlin
  2009-06-02 14:04   ` Rusty Russell
@ 2009-06-02 14:04   ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-02 14:04 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: netdev, virtualization, Herbert Xu

On Tue, 2 Jun 2009 05:37:30 pm Mark McLoughlin wrote:
> On Fri, 2009-05-29 at 23:46 +0930, Rusty Russell wrote:
> > This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> > "virtio: wean net driver off NETDEV_TX_BUSY".
> >
> > The complexity of queuing an skb (setting a tasklet to re-xmit) is
> > questionable,
>
> It certainly adds some subtle complexities to start_xmit()
>
> >  especially once we get rid of the other reason for the
> > tasklet in the next patch.
> >
> > If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> > might be frowned upon, but it's common and not going away any time
> > soon.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > ---
> >  drivers/net/virtio_net.c |   49
> > ++++++++++------------------------------------- 1 file changed, 11
> > insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> >
> > @@ -526,27 +517,14 @@ again:
> >  	/* Free up any pending old buffers before queueing new ones. */
> >  	free_old_xmit_skbs(vi);
> >
> > -	/* If we has a buffer left over from last time, send it now. */
> > -	if (unlikely(vi->last_xmit_skb) &&
> > -	    xmit_skb(vi, vi->last_xmit_skb) != 0)
> > -		goto stop_queue;
> > +	/* Put new one in send queue and do transmit */
> > +	__skb_queue_head(&vi->send, skb);
> > +	if (likely(xmit_skb(vi, skb) == 0)) {
> > +		vi->svq->vq_ops->kick(vi->svq);
> > +		return NETDEV_TX_OK;
> > +	}
>
> Hmm, is it okay to leave the skb on the send queue if we return
> NETDEV_TX_BUSY?

Certainly not.  That's a bug.  Incremental fix is:

diff -u b/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- b/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -524,8 +524,9 @@
 		return NETDEV_TX_OK;
 	}
 
-	/* Ring too full for this packet. */
+	/* Ring too full for this packet, remove it from queue again. */
 	pr_debug("%s: virtio not prepared to send\n", dev->name);
+	__skb_unlink(skb, &vi->send);
 	netif_stop_queue(dev);
 
 	/* Activate callback for using skbs: if this returns false it

Thanks!
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-05-29 14:16 [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb Rusty Russell
  2009-06-02  8:07 ` Mark McLoughlin
@ 2009-06-02  8:07 ` Mark McLoughlin
  2009-06-02  9:05 ` Herbert Xu
  2009-06-02  9:05 ` Herbert Xu
  3 siblings, 0 replies; 44+ messages in thread
From: Mark McLoughlin @ 2009-06-02  8:07 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, Herbert Xu, virtualization

On Fri, 2009-05-29 at 23:46 +0930, Rusty Russell wrote:

> This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> "virtio: wean net driver off NETDEV_TX_BUSY".
> 
> The complexity of queuing an skb (setting a tasklet to re-xmit) is
> questionable,

It certainly adds some subtle complexities to start_xmit() 

>  especially once we get rid of the other reason for the
> tasklet in the next patch.
> 
> If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> might be frowned upon, but it's common and not going away any time
> soon.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> ---
>  drivers/net/virtio_net.c |   49 ++++++++++-------------------------------------
>  1 file changed, 11 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
>  
> @@ -526,27 +517,14 @@ again:
>  	/* Free up any pending old buffers before queueing new ones. */
>  	free_old_xmit_skbs(vi);
>  
> -	/* If we has a buffer left over from last time, send it now. */
> -	if (unlikely(vi->last_xmit_skb) &&
> -	    xmit_skb(vi, vi->last_xmit_skb) != 0)
> -		goto stop_queue;
> +	/* Put new one in send queue and do transmit */
> +	__skb_queue_head(&vi->send, skb);
> +	if (likely(xmit_skb(vi, skb) == 0)) {
> +		vi->svq->vq_ops->kick(vi->svq);
> +		return NETDEV_TX_OK;
> +	}

Hmm, is it okay to leave the skb on the send queue if we return
NETDEV_TX_BUSY?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-05-29 14:16 [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb Rusty Russell
  2009-06-02  8:07 ` Mark McLoughlin
  2009-06-02  8:07 ` Mark McLoughlin
@ 2009-06-02  9:05 ` Herbert Xu
  2009-06-02  9:05 ` Herbert Xu
  3 siblings, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-02  9:05 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization

On Fri, May 29, 2009 at 11:46:04PM +0930, Rusty Russell wrote:
> 
> This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> "virtio: wean net driver off NETDEV_TX_BUSY".
> 
> The complexity of queuing an skb (setting a tasklet to re-xmit) is
> questionable, especially once we get rid of the other reason for the
> tasklet in the next patch.
> 
> If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> might be frowned upon, but it's common and not going away any time
> soon.

This looks like a step backwards to me.  If we have to do this
to fix a bug, sure.  But just doing it for the sake of it smells
wrong.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-05-29 14:16 [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb Rusty Russell
                   ` (2 preceding siblings ...)
  2009-06-02  9:05 ` Herbert Xu
@ 2009-06-02  9:05 ` Herbert Xu
  2009-06-02 13:55   ` Rusty Russell
  2009-06-02 13:55   ` Rusty Russell
  3 siblings, 2 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-02  9:05 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization

On Fri, May 29, 2009 at 11:46:04PM +0930, Rusty Russell wrote:
> 
> This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> "virtio: wean net driver off NETDEV_TX_BUSY".
> 
> The complexity of queuing an skb (setting a tasklet to re-xmit) is
> questionable, especially once we get rid of the other reason for the
> tasklet in the next patch.
> 
> If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> might be frowned upon, but it's common and not going away any time
> soon.

This looks like a step backwards to me.  If we have to do this
to fix a bug, sure.  But just doing it for the sake of it smells
wrong.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02  9:05 ` Herbert Xu
@ 2009-06-02 13:55   ` Rusty Russell
  2009-06-02 23:45     ` Herbert Xu
  2009-06-02 23:45     ` Herbert Xu
  2009-06-02 13:55   ` Rusty Russell
  1 sibling, 2 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-02 13:55 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization

On Tue, 2 Jun 2009 06:35:52 pm Herbert Xu wrote:
> On Fri, May 29, 2009 at 11:46:04PM +0930, Rusty Russell wrote:
> > This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> > "virtio: wean net driver off NETDEV_TX_BUSY".
> >
> > The complexity of queuing an skb (setting a tasklet to re-xmit) is
> > questionable, especially once we get rid of the other reason for the
> > tasklet in the next patch.
> >
> > If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> > might be frowned upon, but it's common and not going away any time
> > soon.
>
> This looks like a step backwards to me.  If we have to do this
> to fix a bug, sure.  But just doing it for the sake of it smells
> wrong.

I disagree.  We've introduced a third queue, inside the driver, one element 
long.  That feels terribly, terribly hacky and wrong.

What do we do if it overflows?  Discard the packet, even if we have room in the 
tx queue.   And when do we flush this queue?  Well, that's a bit messy.  
Obviously we need to enable the tx interrupt when the tx queue is full, but we 
can't just netif_wake_queue, we have to also flush the queue.  We can't do that 
in an irq handler, since we need to block the normal tx path (or introduce 
another lock and disable interrupts in our xmit routine).  So we add a tasklet 
to do this re-transmission.

Or, we could just "return NETDEV_TX_BUSY;".  I like that :)

Hope that clarifies,
Rusty.

>
> Cheers,

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02 13:55   ` Rusty Russell
@ 2009-06-02 23:45     ` Herbert Xu
  2009-06-02 23:45     ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-02 23:45 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization

On Tue, Jun 02, 2009 at 11:25:57PM +0930, Rusty Russell wrote:
>
> Or, we could just "return NETDEV_TX_BUSY;".  I like that :)

No you should fix it so that you check the queue status after
transmitting a packet so we never get into this state in the
first place.  NETDEV_TX_BUSY is just passing the problem to
someone else, which is not nice at all.

For example, anyone running tcpdump will now see the packet
twice.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02 13:55   ` Rusty Russell
  2009-06-02 23:45     ` Herbert Xu
@ 2009-06-02 23:45     ` Herbert Xu
  2009-06-03  3:17       ` Rusty Russell
  2009-06-03  3:17       ` Rusty Russell
  1 sibling, 2 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-02 23:45 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization

On Tue, Jun 02, 2009 at 11:25:57PM +0930, Rusty Russell wrote:
>
> Or, we could just "return NETDEV_TX_BUSY;".  I like that :)

No you should fix it so that you check the queue status after
transmitting a packet so we never get into this state in the
first place.  NETDEV_TX_BUSY is just passing the problem to
someone else, which is not nice at all.

For example, anyone running tcpdump will now see the packet
twice.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02 23:45     ` Herbert Xu
@ 2009-06-03  3:17       ` Rusty Russell
  2009-06-08  5:22         ` Herbert Xu
  2009-06-08  5:22         ` Herbert Xu
  2009-06-03  3:17       ` Rusty Russell
  1 sibling, 2 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-03  3:17 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization, David Miller

On Wed, 3 Jun 2009 09:15:32 am Herbert Xu wrote:
> On Tue, Jun 02, 2009 at 11:25:57PM +0930, Rusty Russell wrote:
> > Or, we could just "return NETDEV_TX_BUSY;".  I like that :)
>
> No you should fix it so that you check the queue status after
> transmitting a packet so we never get into this state in the
> first place.

We could figure out if we can take the worst-case packet, and underutilize
our queue.  And fix the other *67* drivers.

Of course that doesn't even work, because we return NETDEV_TX_BUSY from dev.c!

"Hi, core netdevs here.  Don't use NETDEV_TX_BUSY.   Yeah, we can't figure out
how to avoid it either.  But y'know, just hack something together".

Herbert, we are *better* than this!

How's this?  Tested for the virtio_net driver here.

[RFC] net: fix double-tcpdump problem with NETDEV_TX_BUSY.

Herbert shares a distain for drivers returning TX_BUSY because
network taps see packets twice when it's used.  Unfortunately, it's
ubiquitous.

This patch marks packets by (ab)using the "peeked" bit in the skb.
This bit is currently used for packets queued in a socket; we reset it
in dev_queue_xmit and set it when we hand the packet to
dev_queue_xmit_nit.

We also reset it on incoming packets: this is safe, but it might be
sufficient to reset it only in the loopback driver?

diff --git a/net/core/dev.c b/net/core/dev.c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1678,8 +1678,10 @@ int dev_hard_start_xmit(struct sk_buff *
 	int rc;

 	if (likely(!skb->next)) {
-		if (!list_empty(&ptype_all))
+		if (!list_empty(&ptype_all) && !skb->peeked) {
 			dev_queue_xmit_nit(skb, dev);
+			skb->peeked = true;
+		}

 		if (netif_needs_gso(dev, skb)) {
 			if (unlikely(dev_gso_segment(skb)))
@@ -1796,6 +1798,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;

+	skb->peeked = false;
+
 	/* GSO will handle the following emulations directly. */
 	if (netif_needs_gso(dev, skb))
 		goto gso;
@@ -1942,6 +1946,8 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);

+	skb->peeked = false;
+
 	/*
 	 * The code is rearranged so that the path is the most
 	 * short when CPU is congested, but is still operating.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-03  3:17       ` Rusty Russell
@ 2009-06-08  5:22         ` Herbert Xu
  2009-06-13 12:30           ` Rusty Russell
  2009-06-13 12:30           ` Rusty Russell
  2009-06-08  5:22         ` Herbert Xu
  1 sibling, 2 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-08  5:22 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization, David Miller

On Wed, Jun 03, 2009 at 12:47:04PM +0930, Rusty Russell wrote:
> 
> We could figure out if we can take the worst-case packet, and underutilize
> our queue.  And fix the other *67* drivers.

Most of those are for debugging purposes, i.e., they'll never
happen unless the driver is buggy.

> Of course that doesn't even work, because we return NETDEV_TX_BUSY from dev.c!

If and when your driver becomes part of the core and it has to
feed into other drivers, then you can use this argument :)

> "Hi, core netdevs here.  Don't use NETDEV_TX_BUSY.   Yeah, we can't figure out
> how to avoid it either.  But y'know, just hack something together".

No you've misunderstood my complaint.  I'm not trying to get you
to replace NETDEV_TX_BUSY by the equally abhorrent queue in the
driver, I'm saying that you should stop the queue before you get
a packet that overflows by looking at the amount of free queue
space after transmitting each packet.

For most drivers this is easy to do.  What's so different about
virtio-net that makes this impossible?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-08  5:22         ` Herbert Xu
@ 2009-06-13 12:30           ` Rusty Russell
  2009-06-13 12:30           ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-13 12:30 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, David Miller, virtualization

On Mon, 8 Jun 2009 02:52:47 pm Herbert Xu wrote:
> No you've misunderstood my complaint.  I'm not trying to get you
> to replace NETDEV_TX_BUSY by the equally abhorrent queue in the
> driver, I'm saying that you should stop the queue before you get
> a packet that overflows by looking at the amount of free queue
> space after transmitting each packet.
>
> For most drivers this is easy to do.  What's so different about
> virtio-net that makes this impossible?

If we assume the worst case; ie. that the next packet will use max frags, we 
get close (make add_buf take a "unsigned int *descs_left" arg).  Obviously, 
this is suboptimal use of the ring.  We can still get kmalloc failures w/ 
indirect descriptors, but dropping a packet then isn't a huge deal IMHO.

But re your comment that the 67 drivers using TX_BUSY are doing it because of 
driver bugs, that's hard to believe.  It either hardly ever happens (in which 
case just drop the packet), or it happens (in which case we should handle it 
correctly).

TX_BUSY makes me queasy:  you haven't convinced me it shouldn't be killed or 
fixed.

Did you look at my attempted fix?
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-08  5:22         ` Herbert Xu
  2009-06-13 12:30           ` Rusty Russell
@ 2009-06-13 12:30           ` Rusty Russell
  2009-06-14  6:45             ` Herbert Xu
  2009-06-14  6:45             ` Herbert Xu
  1 sibling, 2 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-13 12:30 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization, David Miller

On Mon, 8 Jun 2009 02:52:47 pm Herbert Xu wrote:
> No you've misunderstood my complaint.  I'm not trying to get you
> to replace NETDEV_TX_BUSY by the equally abhorrent queue in the
> driver, I'm saying that you should stop the queue before you get
> a packet that overflows by looking at the amount of free queue
> space after transmitting each packet.
>
> For most drivers this is easy to do.  What's so different about
> virtio-net that makes this impossible?

If we assume the worst case; ie. that the next packet will use max frags, we 
get close (make add_buf take a "unsigned int *descs_left" arg).  Obviously, 
this is suboptimal use of the ring.  We can still get kmalloc failures w/ 
indirect descriptors, but dropping a packet then isn't a huge deal IMHO.

But re your comment that the 67 drivers using TX_BUSY are doing it because of 
driver bugs, that's hard to believe.  It either hardly ever happens (in which 
case just drop the packet), or it happens (in which case we should handle it 
correctly).

TX_BUSY makes me queasy:  you haven't convinced me it shouldn't be killed or 
fixed.

Did you look at my attempted fix?
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-13 12:30           ` Rusty Russell
@ 2009-06-14  6:45             ` Herbert Xu
  2009-06-18  7:17               ` Rusty Russell
  2009-06-18  7:17               ` Rusty Russell
  2009-06-14  6:45             ` Herbert Xu
  1 sibling, 2 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-14  6:45 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization, David Miller

On Sat, Jun 13, 2009 at 10:00:37PM +0930, Rusty Russell wrote:
> 
> But re your comment that the 67 drivers using TX_BUSY are doing it because of 
> driver bugs, that's hard to believe.  It either hardly ever happens (in which 
> case just drop the packet), or it happens (in which case we should handle it 
> correctly).

Most of them just do this:

start_xmit:

if (unlikely(queue is full)) {
	/* This should never happen. */
	return TX_BUSY;
}

transmit

if (queue is full)
	stop queue

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-14  6:45             ` Herbert Xu
@ 2009-06-18  7:17               ` Rusty Russell
  2009-06-18  7:17               ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-18  7:17 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, David Miller, virtualization

On Sun, 14 Jun 2009 04:15:28 pm Herbert Xu wrote:
> On Sat, Jun 13, 2009 at 10:00:37PM +0930, Rusty Russell wrote:
> > But re your comment that the 67 drivers using TX_BUSY are doing it
> > because of driver bugs, that's hard to believe.  It either hardly ever
> > happens (in which case just drop the packet), or it happens (in which
> > case we should handle it correctly).
>
> Most of them just do this:
>
> start_xmit:
>
> if (unlikely(queue is full)) {
> 	/* This should never happen. */
> 	return TX_BUSY;
> }

OK, so I did a rough audit, trying to figure out the "never happens" ones (N, 
could be kfree_skb(skb); return NETDEV_TX_OK) from the "will actually happen" 
ones (Y).

One question: can netif_queue_stopped() ever be true inside start_xmit?  Some 
drivers test that, like sun3lance.c.

Some have multiple returns, and some are unclear, but my best guess on a 
quickish reading is below.

Summary: we still have about 54 in-tree drivers which actually use 
NETDEV_TX_BUSY for normal paths.  Can I fix it now?

Thanks,
Rusty.

sungem.c: Y, N
fs_enet: N
mace.c: N
sh_eth.c: Y
de620.c: N
acenic.c: N (but goes through stupid hoops to avoid it)
e1000_main.c: N, Y
korina.c: N (Buggy: frees skb and returns TX_BUSY.)
sky2.c: N
cassini.c: N
ixgbe: N
b44.c: N, Y, Y (last two buggy: OOM, does not stop q)
macb.c: N
niu.c: N
smctr.c: N
olympic.c: Y
tms380tr.c: N
3c359.c: Y
lanstreamer.c: Y
lib8390.c: N
xirc2ps_cs.c: Y
smc91c92_cs.c: N
fmvj18x_cs.c: N (and buggy: can't happen AFAICT, and return 0 above?)
axnet_cs.c: N
smsc9420.c: N (and buggy: doesn't stop q)
mkiss.c: N (In xmit, can netif_running be false? Can netif_queue_stopped?)
skge.c: N
qlge: N, N, Y, N (buggy, OOM, does not stop q) 
chelsio: N
s2io.c: Y, Y?
macmace.c: N
3c505.c: Y
defxx.c: Y
myri10ge: N
sbni.c: Y
wanxl.c: N
cycx_x25.c: N, N, Y?
dlci.c: Y
qla3xxx.c: N, N (buggy, OOM, does not stop q), Y, N, 
tlan.c: Y
skfp.c: N
cs89x0.c: N
smc9194.c: N
fec_mpc52xx.c: N
mv643xx_eth.c: N (buggy, OOM, does not stop q)
ll_temac_main.c: Y, Y
netxen: Y
tsi108_eth.c: N, N
ni65.c: N
sunhme.c: N
atl1c.c: Y
ps3_gelic_net.c: Y
igbvf: N
csgb3.c: N
ks8695net.c: N, N (buggy, neither stops q, latter OOM)
ether3.c: N
at91_ether.c: N
bnx2x_main.c: N, N
dm9000.c: N
jme.c: N
3c537.c: Y (plus, leak on skb_padto fail)
arcnet.c: N?
3c59x.c: N
au1000_eth.c: Y
ixgb: N
de600.c: N, N, N
myri_sbus.c: Y
bnx2.c: N
atl1e: Y
sonic.c: who cares, that won't even compile... (missing semicolon)
sun3_82586.c: N
3c515.c: N
ibm_newemac.c: Y
donaubae.c:Y?, Y?, Y?, Y (but never stops q)
sir_dev.c: Y
au1k_ir.c: Y, Y
cpmac.c: N (no stop q, and leak on skb_padto fail), Y
davinci_emac.c: N (no stop q), Y
de2104x.c: N
uli526x.c: N
dmfe.c: N
xircom_cb.c: N
iwmc3200wifi: Y
orinoco: N, N, N, N (no stop q)
atmel.c: Y
p54common.c: N, Y?
arlan-main.c: Y?
libipw_tx.c: Y (no stop q), N (alloc failure)
hostap_80211_tx.c: Y
strip.c: N
wavelan.c: N, N, N
at76c50x-usb.c: N
libertas/tx.c: Y
ray_cs.c: N, N
airo.c: Y, Y, Y
plip.c: N, N, N (starts q, BUSY on too big pkt?)
ns83820.c: N, N
ehea: Y, Y (no stop q)
rionet.c: N
enic: N
sis900.c: N
starfire.c: Y
r6040.c: N
sun3lance.c: N, N
sfc: Y, N, Y
mac89x0.c: N
sb1250-mac.c: Y
pasemi_mac.c: Y
8139cp.c: N
e1000e: N
r8169.c: N?
sis190.c: N
e100.c: N
tg3.c: N, Y?, N
fec.c: N (no stop q), N
hamachi.c: N
forcedeth.c: Y, Y
vxge: Y?, Y?
ks8842.c: Y
spider_net.c: Y
igb: N
ewrk3.c: N
gianfar.c: Y
sunvnet.c: N
mlx4: Y
atlx: Y, Y

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-14  6:45             ` Herbert Xu
  2009-06-18  7:17               ` Rusty Russell
@ 2009-06-18  7:17               ` Rusty Russell
  2009-06-18  7:34                 ` Herbert Xu
  2009-06-18  7:34                 ` Herbert Xu
  1 sibling, 2 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-18  7:17 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization, David Miller

On Sun, 14 Jun 2009 04:15:28 pm Herbert Xu wrote:
> On Sat, Jun 13, 2009 at 10:00:37PM +0930, Rusty Russell wrote:
> > But re your comment that the 67 drivers using TX_BUSY are doing it
> > because of driver bugs, that's hard to believe.  It either hardly ever
> > happens (in which case just drop the packet), or it happens (in which
> > case we should handle it correctly).
>
> Most of them just do this:
>
> start_xmit:
>
> if (unlikely(queue is full)) {
> 	/* This should never happen. */
> 	return TX_BUSY;
> }

OK, so I did a rough audit, trying to figure out the "never happens" ones (N, 
could be kfree_skb(skb); return NETDEV_TX_OK) from the "will actually happen" 
ones (Y).

One question: can netif_queue_stopped() ever be true inside start_xmit?  Some 
drivers test that, like sun3lance.c.

Some have multiple returns, and some are unclear, but my best guess on a 
quickish reading is below.

Summary: we still have about 54 in-tree drivers which actually use 
NETDEV_TX_BUSY for normal paths.  Can I fix it now?

Thanks,
Rusty.

sungem.c: Y, N
fs_enet: N
mace.c: N
sh_eth.c: Y
de620.c: N
acenic.c: N (but goes through stupid hoops to avoid it)
e1000_main.c: N, Y
korina.c: N (Buggy: frees skb and returns TX_BUSY.)
sky2.c: N
cassini.c: N
ixgbe: N
b44.c: N, Y, Y (last two buggy: OOM, does not stop q)
macb.c: N
niu.c: N
smctr.c: N
olympic.c: Y
tms380tr.c: N
3c359.c: Y
lanstreamer.c: Y
lib8390.c: N
xirc2ps_cs.c: Y
smc91c92_cs.c: N
fmvj18x_cs.c: N (and buggy: can't happen AFAICT, and return 0 above?)
axnet_cs.c: N
smsc9420.c: N (and buggy: doesn't stop q)
mkiss.c: N (In xmit, can netif_running be false? Can netif_queue_stopped?)
skge.c: N
qlge: N, N, Y, N (buggy, OOM, does not stop q) 
chelsio: N
s2io.c: Y, Y?
macmace.c: N
3c505.c: Y
defxx.c: Y
myri10ge: N
sbni.c: Y
wanxl.c: N
cycx_x25.c: N, N, Y?
dlci.c: Y
qla3xxx.c: N, N (buggy, OOM, does not stop q), Y, N, 
tlan.c: Y
skfp.c: N
cs89x0.c: N
smc9194.c: N
fec_mpc52xx.c: N
mv643xx_eth.c: N (buggy, OOM, does not stop q)
ll_temac_main.c: Y, Y
netxen: Y
tsi108_eth.c: N, N
ni65.c: N
sunhme.c: N
atl1c.c: Y
ps3_gelic_net.c: Y
igbvf: N
csgb3.c: N
ks8695net.c: N, N (buggy, neither stops q, latter OOM)
ether3.c: N
at91_ether.c: N
bnx2x_main.c: N, N
dm9000.c: N
jme.c: N
3c537.c: Y (plus, leak on skb_padto fail)
arcnet.c: N?
3c59x.c: N
au1000_eth.c: Y
ixgb: N
de600.c: N, N, N
myri_sbus.c: Y
bnx2.c: N
atl1e: Y
sonic.c: who cares, that won't even compile... (missing semicolon)
sun3_82586.c: N
3c515.c: N
ibm_newemac.c: Y
donaubae.c:Y?, Y?, Y?, Y (but never stops q)
sir_dev.c: Y
au1k_ir.c: Y, Y
cpmac.c: N (no stop q, and leak on skb_padto fail), Y
davinci_emac.c: N (no stop q), Y
de2104x.c: N
uli526x.c: N
dmfe.c: N
xircom_cb.c: N
iwmc3200wifi: Y
orinoco: N, N, N, N (no stop q)
atmel.c: Y
p54common.c: N, Y?
arlan-main.c: Y?
libipw_tx.c: Y (no stop q), N (alloc failure)
hostap_80211_tx.c: Y
strip.c: N
wavelan.c: N, N, N
at76c50x-usb.c: N
libertas/tx.c: Y
ray_cs.c: N, N
airo.c: Y, Y, Y
plip.c: N, N, N (starts q, BUSY on too big pkt?)
ns83820.c: N, N
ehea: Y, Y (no stop q)
rionet.c: N
enic: N
sis900.c: N
starfire.c: Y
r6040.c: N
sun3lance.c: N, N
sfc: Y, N, Y
mac89x0.c: N
sb1250-mac.c: Y
pasemi_mac.c: Y
8139cp.c: N
e1000e: N
r8169.c: N?
sis190.c: N
e100.c: N
tg3.c: N, Y?, N
fec.c: N (no stop q), N
hamachi.c: N
forcedeth.c: Y, Y
vxge: Y?, Y?
ks8842.c: Y
spider_net.c: Y
igb: N
ewrk3.c: N
gianfar.c: Y
sunvnet.c: N
mlx4: Y
atlx: Y, Y

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-18  7:17               ` Rusty Russell
@ 2009-06-18  7:34                 ` Herbert Xu
  2009-06-18  7:34                 ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-18  7:34 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, David Miller, virtualization

On Thu, Jun 18, 2009 at 04:47:50PM +0930, Rusty Russell wrote:
> 
> One question: can netif_queue_stopped() ever be true inside start_xmit?  Some 
> drivers test that, like sun3lance.c.

The driver should never test that because even if it is true
due to driver shutdown the xmit function should ignore it as
the upper layer will wait for it anyway.

> Summary: we still have about 54 in-tree drivers which actually use 
> NETDEV_TX_BUSY for normal paths.  Can I fix it now?

You can fix it but I don't quite understand your results below :)

> sungem.c: Y, N

This driver does the bug check in addition to a race check that
should simply drop the packet instead of queueing.  In fact chances
are the race check is unnecessary anyway.

> fs_enet: N

This is either just a bug check or the driver is broken in that
it should stop the queue when the said condition can be true.

> mace.c: N

Just a bug check.

> sh_eth.c: Y

This driver should check the queue after transmitting, just like
virtio-net :)

So from a totally non-representative sample of 4, my conclusion
is that none of them need TX_BUSY.  Do you have an example that
really needs it?

Anyway, I don't think we should reshape our APIs based on how
broken the existing users are.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-18  7:17               ` Rusty Russell
  2009-06-18  7:34                 ` Herbert Xu
@ 2009-06-18  7:34                 ` Herbert Xu
  2009-06-19  3:37                   ` Rusty Russell
  2009-06-19  3:37                   ` Rusty Russell
  1 sibling, 2 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-18  7:34 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization, David Miller

On Thu, Jun 18, 2009 at 04:47:50PM +0930, Rusty Russell wrote:
> 
> One question: can netif_queue_stopped() ever be true inside start_xmit?  Some 
> drivers test that, like sun3lance.c.

The driver should never test that because even if it is true
due to driver shutdown the xmit function should ignore it as
the upper layer will wait for it anyway.

> Summary: we still have about 54 in-tree drivers which actually use 
> NETDEV_TX_BUSY for normal paths.  Can I fix it now?

You can fix it but I don't quite understand your results below :)

> sungem.c: Y, N

This driver does the bug check in addition to a race check that
should simply drop the packet instead of queueing.  In fact chances
are the race check is unnecessary anyway.

> fs_enet: N

This is either just a bug check or the driver is broken in that
it should stop the queue when the said condition can be true.

> mace.c: N

Just a bug check.

> sh_eth.c: Y

This driver should check the queue after transmitting, just like
virtio-net :)

So from a totally non-representative sample of 4, my conclusion
is that none of them need TX_BUSY.  Do you have an example that
really needs it?

Anyway, I don't think we should reshape our APIs based on how
broken the existing users are.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-18  7:34                 ` Herbert Xu
@ 2009-06-19  3:37                   ` Rusty Russell
  2009-06-19  4:36                     ` Herbert Xu
  2009-06-19  4:36                     ` Herbert Xu
  2009-06-19  3:37                   ` Rusty Russell
  1 sibling, 2 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-19  3:37 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization, David Miller

On Thu, 18 Jun 2009 05:04:22 pm Herbert Xu wrote:
> On Thu, Jun 18, 2009 at 04:47:50PM +0930, Rusty Russell wrote:
> > Summary: we still have about 54 in-tree drivers which actually use
> > NETDEV_TX_BUSY for normal paths.  Can I fix it now?
>
> You can fix it but I don't quite understand your results below :)

You didn't comment on my patch which tried to fix NETDEV_TX_BUSY tho?

> > sungem.c: Y, N
>
> This driver does the bug check in addition to a race check that
> should simply drop the packet instead of queueing.  In fact chances
> are the race check is unnecessary anyway.

OK, "N" means "can be simply replaced with kfree_skb(skb); return 
NETDEV_TX_OK;".  "Y" means "driver will break if we do that, needs rewriting".
I didn't grade how hard or easy the rewrite would be, but later on I got more 
picky (I would have said this is N, N: the race can be replaced with a drop).

> > fs_enet: N
>
> This is either just a bug check or the driver is broken in that
> it should stop the queue when the said condition can be true.
>
> > mace.c: N
>
> Just a bug check.

Err, that's why they're N (ie. does not need TX_BUSY).

> > sh_eth.c: Y
>
> This driver should check the queue after transmitting, just like
> virtio-net :)
>
> So from a totally non-representative sample of 4, my conclusion
> is that none of them need TX_BUSY.  Do you have an example that
> really needs it?

First you asserted "Most of them just do this:... /* Never happens */".  Now 
I've found ~50 drivers which don't do that, it's "Do any of them really need 
it?".

So, now I'll look at that.  Some are just buggy (I'll send patches for those).  
Most I just have no idea what they're doing; they're pretty ugly.  These ones 
are interesting:

e1000/e1000_main.c: fifo bug workaround?
ehea/ehea_main.c: ?
starfire.c: "we may not have enough slots even when it seems we do."?
tg3.c: tg3_gso_bug

ISTR at least one driver claimed practice showed it was better to return 
TX_BUSY, and one insisted it wouldn't wasn't going to waste MAX_FRAGS on the 
stop-early scheme.

> Anyway, I don't think we should reshape our APIs based on how
> broken the existing users are.

We provided an API, people used it.  Constantly trying to disclaim our 
responsibility for the resulting mess makes me fucking ANGRY.

We either remove the API, or fix it.  I think fixing it is better, because my 
driver will be simpler and it's obvious noone wants to rewrite 50 drivers and 
break several of them.

I don't know how many times I can say the same thing...
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19  3:37                   ` Rusty Russell
@ 2009-06-19  4:36                     ` Herbert Xu
  2009-06-19  4:36                     ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-19  4:36 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, Matt Carlson, David Miller, virtualization

On Fri, Jun 19, 2009 at 01:07:19PM +0930, Rusty Russell wrote:
>
> You didn't comment on my patch which tried to fix NETDEV_TX_BUSY tho?

I think fixing it would only encourage more drivers to use and
abuse TX_BUSY.  The fundamental problem with TX_BUSY is that
you're doing the check before transmitting a packet instead of
after transmitting it.

Let me explain why this is wrong, beyond the fact that tcpdump
may see the packet twice which you've tried to fix.  The problem
is that requeueing is fundamentally hard.  We use to have this
horrible logic in the schedulers to handle this.  Thankfully that
seems to have been replaced with a single device-level packet
holder shared with GSO.

However, that is still wrong for many packet schedulers.  For
example, if the requeued packet is of a lower priority, and a
higher priority packet comes along, we want the higher priority
packet to preempt the requeued packet.  Right now it just doesn't
happen.

This is not as trivial as it seems because on a busy host this can
happen many times a second.  With TX_BUSY the QoS guarantees are
simply not workable.

BTW you pointed out that GSO also uses TX_BUSY, but that is
different because the packet schedulers treat a GSO packet as
a single entity so there is no issue of preemption.  Also tcpdump
will never see it twice by design.

> e1000/e1000_main.c: fifo bug workaround?

The workaround should work just as well as a stop-queue check
after packet transmission.

> ehea/ehea_main.c: ?

Ahh! The bastard LLTX drivers are still around.  LLTX was the
worst abuse associated with TX_BUSY.  Thankfully not many of them
are left.

The fix is to not use LLTX and use the xmit_lock like normal
drivers.

> starfire.c: "we may not have enough slots even when it seems we do."?

Just replace skb_num_frags with SKB_MAX_FRAGS and move the check
after the transmit.

> tg3.c: tg3_gso_bug

A better solution would in fact be to disable hardware TSO when
we encounter such a packet (and drop the first one).

Because once you get one you're likely to get a lot more.  The
difference between hardware TSO and GSO on a card like tg3 is
negligible anyway.

Alternatively just disable TSO completely on those chips.

Ccing the tg3 maintainer.

> We provided an API, people used it.  Constantly trying to disclaim our 
> responsibility for the resulting mess makes me fucking ANGRY.

Where have I disclaimed responsibility? If we were doing that
then we wouldn't be having this discussion.

> We either remove the API, or fix it.  I think fixing it is better, because my 
> driver will be simpler and it's obvious noone wants to rewrite 50 drivers and 
> break several of them.

My preference is obviously in the long term removal of TX_BUSY.
Due to resource constraints that cannot be done immediately.  So
at least we should try to stop its proliferation.

BTW removing TX_BUSY does not mean that your driver has to stay
complicated.  As I have said repeatedly your driver should be
checking the stop-queue condition after transmission, not before.

In fact queueing it in the driver is just as bad as return TX_BUSY!

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19  3:37                   ` Rusty Russell
  2009-06-19  4:36                     ` Herbert Xu
@ 2009-06-19  4:36                     ` Herbert Xu
  2009-06-19 13:50                       ` Rusty Russell
                                         ` (3 more replies)
  1 sibling, 4 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-19  4:36 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization, David Miller, Matt Carlson

On Fri, Jun 19, 2009 at 01:07:19PM +0930, Rusty Russell wrote:
>
> You didn't comment on my patch which tried to fix NETDEV_TX_BUSY tho?

I think fixing it would only encourage more drivers to use and
abuse TX_BUSY.  The fundamental problem with TX_BUSY is that
you're doing the check before transmitting a packet instead of
after transmitting it.

Let me explain why this is wrong, beyond the fact that tcpdump
may see the packet twice which you've tried to fix.  The problem
is that requeueing is fundamentally hard.  We use to have this
horrible logic in the schedulers to handle this.  Thankfully that
seems to have been replaced with a single device-level packet
holder shared with GSO.

However, that is still wrong for many packet schedulers.  For
example, if the requeued packet is of a lower priority, and a
higher priority packet comes along, we want the higher priority
packet to preempt the requeued packet.  Right now it just doesn't
happen.

This is not as trivial as it seems because on a busy host this can
happen many times a second.  With TX_BUSY the QoS guarantees are
simply not workable.

BTW you pointed out that GSO also uses TX_BUSY, but that is
different because the packet schedulers treat a GSO packet as
a single entity so there is no issue of preemption.  Also tcpdump
will never see it twice by design.

> e1000/e1000_main.c: fifo bug workaround?

The workaround should work just as well as a stop-queue check
after packet transmission.

> ehea/ehea_main.c: ?

Ahh! The bastard LLTX drivers are still around.  LLTX was the
worst abuse associated with TX_BUSY.  Thankfully not many of them
are left.

The fix is to not use LLTX and use the xmit_lock like normal
drivers.

> starfire.c: "we may not have enough slots even when it seems we do."?

Just replace skb_num_frags with SKB_MAX_FRAGS and move the check
after the transmit.

> tg3.c: tg3_gso_bug

A better solution would in fact be to disable hardware TSO when
we encounter such a packet (and drop the first one).

Because once you get one you're likely to get a lot more.  The
difference between hardware TSO and GSO on a card like tg3 is
negligible anyway.

Alternatively just disable TSO completely on those chips.

Ccing the tg3 maintainer.

> We provided an API, people used it.  Constantly trying to disclaim our 
> responsibility for the resulting mess makes me fucking ANGRY.

Where have I disclaimed responsibility? If we were doing that
then we wouldn't be having this discussion.

> We either remove the API, or fix it.  I think fixing it is better, because my 
> driver will be simpler and it's obvious noone wants to rewrite 50 drivers and 
> break several of them.

My preference is obviously in the long term removal of TX_BUSY.
Due to resource constraints that cannot be done immediately.  So
at least we should try to stop its proliferation.

BTW removing TX_BUSY does not mean that your driver has to stay
complicated.  As I have said repeatedly your driver should be
checking the stop-queue condition after transmission, not before.

In fact queueing it in the driver is just as bad as return TX_BUSY!

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19  4:36                     ` Herbert Xu
@ 2009-06-19 13:50                       ` Rusty Russell
  2009-06-19 14:10                         ` Herbert Xu
  2009-06-19 14:10                         ` Herbert Xu
  2009-06-19 13:50                       ` Rusty Russell
                                         ` (2 subsequent siblings)
  3 siblings, 2 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-19 13:50 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization, David Miller, Matt Carlson

On Fri, 19 Jun 2009 02:06:13 pm Herbert Xu wrote:
> On Fri, Jun 19, 2009 at 01:07:19PM +0930, Rusty Russell wrote:
> > You didn't comment on my patch which tried to fix NETDEV_TX_BUSY tho?
> However, that is still wrong for many packet schedulers.  For
> example, if the requeued packet is of a lower priority, and a
> higher priority packet comes along, we want the higher priority
> packet to preempt the requeued packet.  Right now it just doesn't
> happen.
>
> This is not as trivial as it seems because on a busy host this can
> happen many times a second.  With TX_BUSY the QoS guarantees are
> simply not workable.

Your use of the word guarantee here indicates an idealized concept of QoS 
which cannot exist on any NIC which has a queue.  We should try to approach 
the ideal, but understand we cannot reach it.

AFAICT, having a non-resortable head entry in the queue is exactly like having 
one-packet slightly longer queue on the NIC.  A little further from the ideal, 
but actually *less* damaging to QoS idea unless it happens on every second 
packet.

On the other hand, we're underutilizing the queue to avoid it.  I find that a 
little embarrassing.

> > We provided an API, people used it.  Constantly trying to disclaim our
> > responsibility for the resulting mess makes me fucking ANGRY.
>
> Where have I disclaimed responsibility? If we were doing that
> then we wouldn't be having this discussion.

"Anyway, I don't think we should reshape our APIs based on how
broken the existing users are."

Perhaps I was reading too much into it, but the implication that we should 
blame the driver authors for writing their drivers in what I consider to be 
the most straightforward and efficient way.

I feel we're being horribly deceptive by giving them a nice API, and upon 
review, telling them "don't use that".  And it's been ongoing for far too 
long.

> In fact queueing it in the driver is just as bad as return TX_BUSY!

Agreed (modulo the tcpdump issue).  And worse, because it's ugly and complex!

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19 13:50                       ` Rusty Russell
@ 2009-06-19 14:10                         ` Herbert Xu
  2009-06-19 14:10                         ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-19 14:10 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, Matt Carlson, David Miller, virtualization

On Fri, Jun 19, 2009 at 11:20:44PM +0930, Rusty Russell wrote:
>
> Your use of the word guarantee here indicates an idealized concept of QoS 
> which cannot exist on any NIC which has a queue.  We should try to approach 
> the ideal, but understand we cannot reach it.

I'm just pointing out that it's better to not have to do this.

Since TX_BUSY and requeueing the packet is unnecessary in the
first place because we can avoid it by managing the stop-queue
action properly, there is no reason to do this because of its
downsides.

> On the other hand, we're underutilizing the queue to avoid it.  I find that a 
> little embarrassing.

Here's why I think this is not an issue.  If your NIC is high
bandwidth then your ring is going to have to be huge so the
amount that is underutilised (a 64K packet) is tiny.  If your
NIC is low bandwidth then this is where you often need QoS and
in that case you do *NOT* want to fully utilise the HW queue.

> I feel we're being horribly deceptive by giving them a nice API, and upon 
> review, telling them "don't use that".  And it's been ongoing for far too 
> long.

If you look at our API documentation it actually says:

        Return codes:
        o NETDEV_TX_OK everything ok.
        o NETDEV_TX_BUSY Cannot transmit packet, try later
          Usually a bug, means queue start/stop flow control is broken in
          the driver. Note: the driver must NOT put the skb in its DMA ring.
        o NETDEV_TX_LOCKED Locking failed, please retry quickly.
          Only valid when NETIF_F_LLTX is set.

So I don't feel too bad :)

> > In fact queueing it in the driver is just as bad as return TX_BUSY!
> 
> Agreed (modulo the tcpdump issue).  And worse, because it's ugly and complex!

The right solution is to stop the queue properly.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19 13:50                       ` Rusty Russell
  2009-06-19 14:10                         ` Herbert Xu
@ 2009-06-19 14:10                         ` Herbert Xu
  2009-06-22  2:39                           ` Rusty Russell
  2009-06-22  2:39                           ` Rusty Russell
  1 sibling, 2 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-19 14:10 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, virtualization, David Miller, Matt Carlson

On Fri, Jun 19, 2009 at 11:20:44PM +0930, Rusty Russell wrote:
>
> Your use of the word guarantee here indicates an idealized concept of QoS 
> which cannot exist on any NIC which has a queue.  We should try to approach 
> the ideal, but understand we cannot reach it.

I'm just pointing out that it's better to not have to do this.

Since TX_BUSY and requeueing the packet is unnecessary in the
first place because we can avoid it by managing the stop-queue
action properly, there is no reason to do this because of its
downsides.

> On the other hand, we're underutilizing the queue to avoid it.  I find that a 
> little embarrassing.

Here's why I think this is not an issue.  If your NIC is high
bandwidth then your ring is going to have to be huge so the
amount that is underutilised (a 64K packet) is tiny.  If your
NIC is low bandwidth then this is where you often need QoS and
in that case you do *NOT* want to fully utilise the HW queue.

> I feel we're being horribly deceptive by giving them a nice API, and upon 
> review, telling them "don't use that".  And it's been ongoing for far too 
> long.

If you look at our API documentation it actually says:

        Return codes:
        o NETDEV_TX_OK everything ok.
        o NETDEV_TX_BUSY Cannot transmit packet, try later
          Usually a bug, means queue start/stop flow control is broken in
          the driver. Note: the driver must NOT put the skb in its DMA ring.
        o NETDEV_TX_LOCKED Locking failed, please retry quickly.
          Only valid when NETIF_F_LLTX is set.

So I don't feel too bad :)

> > In fact queueing it in the driver is just as bad as return TX_BUSY!
> 
> Agreed (modulo the tcpdump issue).  And worse, because it's ugly and complex!

The right solution is to stop the queue properly.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19 14:10                         ` Herbert Xu
@ 2009-06-22  2:39                           ` Rusty Russell
  2009-06-22  2:39                           ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-22  2:39 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization, David Miller, Matt Carlson

On Fri, 19 Jun 2009 11:40:14 pm Herbert Xu wrote:
> > On the other hand, we're underutilizing the queue to avoid it.  I find
> > that a little embarrassing.
>
> Here's why I think this is not an issue.  If your NIC is high
> bandwidth then your ring is going to have to be huge so the
> amount that is underutilised (a 64K packet) is tiny.  If your
> NIC is low bandwidth then this is where you often need QoS and
> in that case you do *NOT* want to fully utilise the HW queue.

Well, we leave that up to the NIC designer.  It's retro-justification for 
throwing away a few percent of the queue, but the QoS issue is even more in 
the noise.

But I will give in now and change virtio_net to use this substandard API :(

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19 14:10                         ` Herbert Xu
  2009-06-22  2:39                           ` Rusty Russell
@ 2009-06-22  2:39                           ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-22  2:39 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, Matt Carlson, David Miller, virtualization

On Fri, 19 Jun 2009 11:40:14 pm Herbert Xu wrote:
> > On the other hand, we're underutilizing the queue to avoid it.  I find
> > that a little embarrassing.
>
> Here's why I think this is not an issue.  If your NIC is high
> bandwidth then your ring is going to have to be huge so the
> amount that is underutilised (a 64K packet) is tiny.  If your
> NIC is low bandwidth then this is where you often need QoS and
> in that case you do *NOT* want to fully utilise the HW queue.

Well, we leave that up to the NIC designer.  It's retro-justification for 
throwing away a few percent of the queue, but the QoS issue is even more in 
the noise.

But I will give in now and change virtio_net to use this substandard API :(

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19  4:36                     ` Herbert Xu
  2009-06-19 13:50                       ` Rusty Russell
@ 2009-06-19 13:50                       ` Rusty Russell
  2009-06-22  5:46                       ` Krishna Kumar2
  2009-06-22  5:46                       ` Krishna Kumar2
  3 siblings, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-19 13:50 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, Matt Carlson, David Miller, virtualization

On Fri, 19 Jun 2009 02:06:13 pm Herbert Xu wrote:
> On Fri, Jun 19, 2009 at 01:07:19PM +0930, Rusty Russell wrote:
> > You didn't comment on my patch which tried to fix NETDEV_TX_BUSY tho?
> However, that is still wrong for many packet schedulers.  For
> example, if the requeued packet is of a lower priority, and a
> higher priority packet comes along, we want the higher priority
> packet to preempt the requeued packet.  Right now it just doesn't
> happen.
>
> This is not as trivial as it seems because on a busy host this can
> happen many times a second.  With TX_BUSY the QoS guarantees are
> simply not workable.

Your use of the word guarantee here indicates an idealized concept of QoS 
which cannot exist on any NIC which has a queue.  We should try to approach 
the ideal, but understand we cannot reach it.

AFAICT, having a non-resortable head entry in the queue is exactly like having 
one-packet slightly longer queue on the NIC.  A little further from the ideal, 
but actually *less* damaging to QoS idea unless it happens on every second 
packet.

On the other hand, we're underutilizing the queue to avoid it.  I find that a 
little embarrassing.

> > We provided an API, people used it.  Constantly trying to disclaim our
> > responsibility for the resulting mess makes me fucking ANGRY.
>
> Where have I disclaimed responsibility? If we were doing that
> then we wouldn't be having this discussion.

"Anyway, I don't think we should reshape our APIs based on how
broken the existing users are."

Perhaps I was reading too much into it, but the implication that we should 
blame the driver authors for writing their drivers in what I consider to be 
the most straightforward and efficient way.

I feel we're being horribly deceptive by giving them a nice API, and upon 
review, telling them "don't use that".  And it's been ongoing for far too 
long.

> In fact queueing it in the driver is just as bad as return TX_BUSY!

Agreed (modulo the tcpdump issue).  And worse, because it's ugly and complex!

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19  4:36                     ` Herbert Xu
  2009-06-19 13:50                       ` Rusty Russell
  2009-06-19 13:50                       ` Rusty Russell
@ 2009-06-22  5:46                       ` Krishna Kumar2
  2009-06-22  5:46                       ` Krishna Kumar2
  3 siblings, 0 replies; 44+ messages in thread
From: Krishna Kumar2 @ 2009-06-22  5:46 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, Matt Carlson, David Miller, virtualization

Hi Herbert,

> Herbert Xu <herbert@gondor.apana.org.au> wrote on 06/19/2009 10:06:13 AM:
>
> > We either remove the API, or fix it.  I think fixing it is better,
because my
> > driver will be simpler and it's obvious noone wants to rewrite 50
drivers and
> > break several of them.
>
> My preference is obviously in the long term removal of TX_BUSY.
> Due to resource constraints that cannot be done immediately.  So
> at least we should try to stop its proliferation.
>
> BTW removing TX_BUSY does not mean that your driver has to stay
> complicated.  As I have said repeatedly your driver should be
> checking the stop-queue condition after transmission, not before.
>
> In fact queueing it in the driver is just as bad as return TX_BUSY!

I was curious about "queueing it in the driver" part: why is this bad? Do
you
anticipate any performance problems, or does it break QoS, or something
else I
have missed?

thanks,

- KK

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-19  4:36                     ` Herbert Xu
                                         ` (2 preceding siblings ...)
  2009-06-22  5:46                       ` Krishna Kumar2
@ 2009-06-22  5:46                       ` Krishna Kumar2
  2009-06-22  7:34                         ` Herbert Xu
  2009-06-22  7:34                         ` Herbert Xu
  3 siblings, 2 replies; 44+ messages in thread
From: Krishna Kumar2 @ 2009-06-22  5:46 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David Miller, Matt Carlson, netdev, Rusty Russell, virtualization

Hi Herbert,

> Herbert Xu <herbert@gondor.apana.org.au> wrote on 06/19/2009 10:06:13 AM:
>
> > We either remove the API, or fix it.  I think fixing it is better,
because my
> > driver will be simpler and it's obvious noone wants to rewrite 50
drivers and
> > break several of them.
>
> My preference is obviously in the long term removal of TX_BUSY.
> Due to resource constraints that cannot be done immediately.  So
> at least we should try to stop its proliferation.
>
> BTW removing TX_BUSY does not mean that your driver has to stay
> complicated.  As I have said repeatedly your driver should be
> checking the stop-queue condition after transmission, not before.
>
> In fact queueing it in the driver is just as bad as return TX_BUSY!

I was curious about "queueing it in the driver" part: why is this bad? Do
you
anticipate any performance problems, or does it break QoS, or something
else I
have missed?

thanks,

- KK


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22  5:46                       ` Krishna Kumar2
@ 2009-06-22  7:34                         ` Herbert Xu
  2009-06-22  7:34                         ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-22  7:34 UTC (permalink / raw)
  To: Krishna Kumar2; +Cc: netdev, Matt Carlson, David Miller, virtualization

On Mon, Jun 22, 2009 at 11:16:03AM +0530, Krishna Kumar2 wrote:
>
> I was curious about "queueing it in the driver" part: why is this bad? Do
> you
> anticipate any performance problems, or does it break QoS, or something
> else I
> have missed?

Queueing it in the driver is bad because it is no different than
queueing it at the upper layer, which is what will happen when
you return TX_BUSY.

Because we've ripped out the qdisc requeueing logic (which is
horribly complex and only existed because of TX_BUSY), it means
that higher priority packets cannot preempt a packet that is queued
in this way.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22  5:46                       ` Krishna Kumar2
  2009-06-22  7:34                         ` Herbert Xu
@ 2009-06-22  7:34                         ` Herbert Xu
  2009-06-22 13:41                           ` Krishna Kumar2
                                             ` (3 more replies)
  1 sibling, 4 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-22  7:34 UTC (permalink / raw)
  To: Krishna Kumar2
  Cc: David Miller, Matt Carlson, netdev, Rusty Russell, virtualization

On Mon, Jun 22, 2009 at 11:16:03AM +0530, Krishna Kumar2 wrote:
>
> I was curious about "queueing it in the driver" part: why is this bad? Do
> you
> anticipate any performance problems, or does it break QoS, or something
> else I
> have missed?

Queueing it in the driver is bad because it is no different than
queueing it at the upper layer, which is what will happen when
you return TX_BUSY.

Because we've ripped out the qdisc requeueing logic (which is
horribly complex and only existed because of TX_BUSY), it means
that higher priority packets cannot preempt a packet that is queued
in this way.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22  7:34                         ` Herbert Xu
@ 2009-06-22 13:41                           ` Krishna Kumar2
  2009-06-22 13:41                           ` Krishna Kumar2
                                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 44+ messages in thread
From: Krishna Kumar2 @ 2009-06-22 13:41 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David Miller, Matt Carlson, netdev, Rusty Russell, virtualization

Thanks Herbert. I thought lesser processing is required for those skbs
queued at
the driver (avoid qdisc_restart, and repeated calls to dequeue_skb where
skb from
the cached 'gso_skb' is checked if send'able and put back till the queue is
re-enabled)
and hence some gains is possible. So far, my testing of queueing in the
driver shows
good results for some test cases and bad results for others, hence my
question on
the topic as I am not able to figure out why some cases test worse.

- KK

Herbert Xu <herbert@gondor.apana.org.au> wrote on 06/22/2009 01:04:17 PM:

> Herbert Xu <herbert@gondor.apana.org.au>
> 06/22/2009 01:04 PM
>
> To
>
> Krishna Kumar2/India/IBM@IBMIN
>
> cc
>
> David Miller <davem@davemloft.net>, Matt Carlson <mcarlson@broadcom.com>,

> netdev@vger.kernel.org, Rusty Russell <rusty@rustcorp.com.au>,
> virtualization@lists.linux-foundation.org
>
> Subject
>
> Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an
extra skb.
>
> On Mon, Jun 22, 2009 at 11:16:03AM +0530, Krishna Kumar2 wrote:
> >
> > I was curious about "queueing it in the driver" part: why is this bad?
Do
> > you
> > anticipate any performance problems, or does it break QoS, or something
> > else I
> > have missed?
>
> Queueing it in the driver is bad because it is no different than
> queueing it at the upper layer, which is what will happen when
> you return TX_BUSY.
>
> Because we've ripped out the qdisc requeueing logic (which is
> horribly complex and only existed because of TX_BUSY), it means
> that higher priority packets cannot preempt a packet that is queued
> in this way.
>
> Cheers,
> --
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22  7:34                         ` Herbert Xu
  2009-06-22 13:41                           ` Krishna Kumar2
@ 2009-06-22 13:41                           ` Krishna Kumar2
  2009-06-22 18:25                           ` Matt Carlson
  2009-06-22 18:25                           ` Matt Carlson
  3 siblings, 0 replies; 44+ messages in thread
From: Krishna Kumar2 @ 2009-06-22 13:41 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, Matt Carlson, David Miller, virtualization

Thanks Herbert. I thought lesser processing is required for those skbs
queued at
the driver (avoid qdisc_restart, and repeated calls to dequeue_skb where
skb from
the cached 'gso_skb' is checked if send'able and put back till the queue is
re-enabled)
and hence some gains is possible. So far, my testing of queueing in the
driver shows
good results for some test cases and bad results for others, hence my
question on
the topic as I am not able to figure out why some cases test worse.

- KK

Herbert Xu <herbert@gondor.apana.org.au> wrote on 06/22/2009 01:04:17 PM:

> Herbert Xu <herbert@gondor.apana.org.au>
> 06/22/2009 01:04 PM
>
> To
>
> Krishna Kumar2/India/IBM@IBMIN
>
> cc
>
> David Miller <davem@davemloft.net>, Matt Carlson <mcarlson@broadcom.com>,

> netdev@vger.kernel.org, Rusty Russell <rusty@rustcorp.com.au>,
> virtualization@lists.linux-foundation.org
>
> Subject
>
> Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an
extra skb.
>
> On Mon, Jun 22, 2009 at 11:16:03AM +0530, Krishna Kumar2 wrote:
> >
> > I was curious about "queueing it in the driver" part: why is this bad?
Do
> > you
> > anticipate any performance problems, or does it break QoS, or something
> > else I
> > have missed?
>
> Queueing it in the driver is bad because it is no different than
> queueing it at the upper layer, which is what will happen when
> you return TX_BUSY.
>
> Because we've ripped out the qdisc requeueing logic (which is
> horribly complex and only existed because of TX_BUSY), it means
> that higher priority packets cannot preempt a packet that is queued
> in this way.
>
> Cheers,
> --
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22  7:34                         ` Herbert Xu
  2009-06-22 13:41                           ` Krishna Kumar2
  2009-06-22 13:41                           ` Krishna Kumar2
@ 2009-06-22 18:25                           ` Matt Carlson
  2009-06-22 18:25                           ` Matt Carlson
  3 siblings, 0 replies; 44+ messages in thread
From: Matt Carlson @ 2009-06-22 18:25 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Krishna Kumar2, netdev, Matthew Carlson, virtualization, David Miller

On Mon, Jun 22, 2009 at 12:34:17AM -0700, Herbert Xu wrote:
> On Mon, Jun 22, 2009 at 11:16:03AM +0530, Krishna Kumar2 wrote:
> >
> > I was curious about "queueing it in the driver" part: why is this bad? Do
> > you
> > anticipate any performance problems, or does it break QoS, or something
> > else I
> > have missed?
> 
> Queueing it in the driver is bad because it is no different than
> queueing it at the upper layer, which is what will happen when
> you return TX_BUSY.
> 
> Because we've ripped out the qdisc requeueing logic (which is
> horribly complex and only existed because of TX_BUSY), it means
> that higher priority packets cannot preempt a packet that is queued
> in this way.

As was said elsewhere, from the driver writer's perspective every packet
that has already been submitted (queued) to the hardware cannot be
preempted.  Slightly extending that logic doesn't seem like that much of
a problem, especially if it saves the troublesome requeuing logic higher up.

Below is a very rough, untested patch that implements what I am
thinking.  The patch only allows one tx packet to be stored, so it
shouldn't impact any QoS strategy that much.  It can even be extended to
eliminate the rest of the NETDEV_TX_BUSY return values, if that is the
direction everyone wants to go.

Do you agree this is a better approach to turning off TSO completely?


diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 46a3f86..f061c04 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4223,6 +4223,8 @@ static inline u32 tg3_tx_avail(struct tg3 *tp)
 		((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1)));
 }
 
+static int tg3_start_xmit_dma_bug(struct sk_buff *, struct net_device *);
+
 /* Tigon3 never reports partial packet sends.  So we do not
  * need special logic to handle SKBs that have not had all
  * of their frags sent yet, like SunGEM does.
@@ -4272,7 +4274,13 @@ static void tg3_tx(struct tg3 *tp)
 	 */
 	smp_mb();
 
-	if (unlikely(netif_queue_stopped(tp->dev) &&
+	if (tp->tx_skb) {
+		struct sk_buff *skb = tp->tx_skb;
+		tp->tx_skb = NULL;
+		tg3_start_xmit_dma_bug(skb, tp->dev);
+	}
+
+	if (!tp->tx_skb && unlikely(netif_queue_stopped(tp->dev) &&
 		     (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH(tp)))) {
 		netif_tx_lock(tp->dev);
 		if (netif_queue_stopped(tp->dev) &&
@@ -5211,8 +5219,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb)
 	/* Estimate the number of fragments in the worst case */
 	if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) {
 		netif_stop_queue(tp->dev);
-		if (tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))
-			return NETDEV_TX_BUSY;
+		if (tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3)) {
+			tp->tx_skb = skb;
+			return NETDEV_TX_OK;
+		}
 
 		netif_wake_queue(tp->dev);
 	}
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index b3347c4..ed1d6ff 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2515,6 +2515,7 @@ struct tg3 {
 	u32				tx_prod;
 	u32				tx_cons;
 	u32				tx_pending;
+	struct sk_buff			*tx_skb;
 
 	struct tg3_tx_buffer_desc	*tx_ring;
 	struct tx_ring_info		*tx_buffers;

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22  7:34                         ` Herbert Xu
                                             ` (2 preceding siblings ...)
  2009-06-22 18:25                           ` Matt Carlson
@ 2009-06-22 18:25                           ` Matt Carlson
  2009-06-23  2:54                             ` Herbert Xu
  2009-06-23  2:54                             ` Herbert Xu
  3 siblings, 2 replies; 44+ messages in thread
From: Matt Carlson @ 2009-06-22 18:25 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Krishna Kumar2, David Miller, Matthew Carlson, netdev,
	Rusty Russell, virtualization

On Mon, Jun 22, 2009 at 12:34:17AM -0700, Herbert Xu wrote:
> On Mon, Jun 22, 2009 at 11:16:03AM +0530, Krishna Kumar2 wrote:
> >
> > I was curious about "queueing it in the driver" part: why is this bad? Do
> > you
> > anticipate any performance problems, or does it break QoS, or something
> > else I
> > have missed?
> 
> Queueing it in the driver is bad because it is no different than
> queueing it at the upper layer, which is what will happen when
> you return TX_BUSY.
> 
> Because we've ripped out the qdisc requeueing logic (which is
> horribly complex and only existed because of TX_BUSY), it means
> that higher priority packets cannot preempt a packet that is queued
> in this way.

As was said elsewhere, from the driver writer's perspective every packet
that has already been submitted (queued) to the hardware cannot be
preempted.  Slightly extending that logic doesn't seem like that much of
a problem, especially if it saves the troublesome requeuing logic higher up.

Below is a very rough, untested patch that implements what I am
thinking.  The patch only allows one tx packet to be stored, so it
shouldn't impact any QoS strategy that much.  It can even be extended to
eliminate the rest of the NETDEV_TX_BUSY return values, if that is the
direction everyone wants to go.

Do you agree this is a better approach to turning off TSO completely?


diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 46a3f86..f061c04 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4223,6 +4223,8 @@ static inline u32 tg3_tx_avail(struct tg3 *tp)
 		((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1)));
 }
 
+static int tg3_start_xmit_dma_bug(struct sk_buff *, struct net_device *);
+
 /* Tigon3 never reports partial packet sends.  So we do not
  * need special logic to handle SKBs that have not had all
  * of their frags sent yet, like SunGEM does.
@@ -4272,7 +4274,13 @@ static void tg3_tx(struct tg3 *tp)
 	 */
 	smp_mb();
 
-	if (unlikely(netif_queue_stopped(tp->dev) &&
+	if (tp->tx_skb) {
+		struct sk_buff *skb = tp->tx_skb;
+		tp->tx_skb = NULL;
+		tg3_start_xmit_dma_bug(skb, tp->dev);
+	}
+
+	if (!tp->tx_skb && unlikely(netif_queue_stopped(tp->dev) &&
 		     (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH(tp)))) {
 		netif_tx_lock(tp->dev);
 		if (netif_queue_stopped(tp->dev) &&
@@ -5211,8 +5219,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb)
 	/* Estimate the number of fragments in the worst case */
 	if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) {
 		netif_stop_queue(tp->dev);
-		if (tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))
-			return NETDEV_TX_BUSY;
+		if (tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3)) {
+			tp->tx_skb = skb;
+			return NETDEV_TX_OK;
+		}
 
 		netif_wake_queue(tp->dev);
 	}
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index b3347c4..ed1d6ff 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2515,6 +2515,7 @@ struct tg3 {
 	u32				tx_prod;
 	u32				tx_cons;
 	u32				tx_pending;
+	struct sk_buff			*tx_skb;
 
 	struct tg3_tx_buffer_desc	*tx_ring;
 	struct tx_ring_info		*tx_buffers;


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22 18:25                           ` Matt Carlson
@ 2009-06-23  2:54                             ` Herbert Xu
  2009-06-23  2:54                             ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-23  2:54 UTC (permalink / raw)
  To: Matt Carlson; +Cc: Krishna Kumar2, netdev, David Miller, virtualization

On Mon, Jun 22, 2009 at 11:25:29AM -0700, Matt Carlson wrote:
>
> As was said elsewhere, from the driver writer's perspective every packet
> that has already been submitted (queued) to the hardware cannot be
> preempted.  Slightly extending that logic doesn't seem like that much of
> a problem, especially if it saves the troublesome requeuing logic higher up.

I think this is pretty much the same as returning TX_BUSY :)

Do you have access to one of these buggy cards? Can you run some
tests to see what the difference between TSO and GSO is on them?

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-22 18:25                           ` Matt Carlson
  2009-06-23  2:54                             ` Herbert Xu
@ 2009-06-23  2:54                             ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-23  2:54 UTC (permalink / raw)
  To: Matt Carlson
  Cc: Krishna Kumar2, David Miller, netdev, Rusty Russell, virtualization

On Mon, Jun 22, 2009 at 11:25:29AM -0700, Matt Carlson wrote:
>
> As was said elsewhere, from the driver writer's perspective every packet
> that has already been submitted (queued) to the hardware cannot be
> preempted.  Slightly extending that logic doesn't seem like that much of
> a problem, especially if it saves the troublesome requeuing logic higher up.

I think this is pretty much the same as returning TX_BUSY :)

Do you have access to one of these buggy cards? Can you run some
tests to see what the difference between TSO and GSO is on them?

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-18  7:34                 ` Herbert Xu
  2009-06-19  3:37                   ` Rusty Russell
@ 2009-06-19  3:37                   ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-19  3:37 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, David Miller, virtualization

On Thu, 18 Jun 2009 05:04:22 pm Herbert Xu wrote:
> On Thu, Jun 18, 2009 at 04:47:50PM +0930, Rusty Russell wrote:
> > Summary: we still have about 54 in-tree drivers which actually use
> > NETDEV_TX_BUSY for normal paths.  Can I fix it now?
>
> You can fix it but I don't quite understand your results below :)

You didn't comment on my patch which tried to fix NETDEV_TX_BUSY tho?

> > sungem.c: Y, N
>
> This driver does the bug check in addition to a race check that
> should simply drop the packet instead of queueing.  In fact chances
> are the race check is unnecessary anyway.

OK, "N" means "can be simply replaced with kfree_skb(skb); return 
NETDEV_TX_OK;".  "Y" means "driver will break if we do that, needs rewriting".
I didn't grade how hard or easy the rewrite would be, but later on I got more 
picky (I would have said this is N, N: the race can be replaced with a drop).

> > fs_enet: N
>
> This is either just a bug check or the driver is broken in that
> it should stop the queue when the said condition can be true.
>
> > mace.c: N
>
> Just a bug check.

Err, that's why they're N (ie. does not need TX_BUSY).

> > sh_eth.c: Y
>
> This driver should check the queue after transmitting, just like
> virtio-net :)
>
> So from a totally non-representative sample of 4, my conclusion
> is that none of them need TX_BUSY.  Do you have an example that
> really needs it?

First you asserted "Most of them just do this:... /* Never happens */".  Now 
I've found ~50 drivers which don't do that, it's "Do any of them really need 
it?".

So, now I'll look at that.  Some are just buggy (I'll send patches for those).  
Most I just have no idea what they're doing; they're pretty ugly.  These ones 
are interesting:

e1000/e1000_main.c: fifo bug workaround?
ehea/ehea_main.c: ?
starfire.c: "we may not have enough slots even when it seems we do."?
tg3.c: tg3_gso_bug

ISTR at least one driver claimed practice showed it was better to return 
TX_BUSY, and one insisted it wouldn't wasn't going to waste MAX_FRAGS on the 
stop-early scheme.

> Anyway, I don't think we should reshape our APIs based on how
> broken the existing users are.

We provided an API, people used it.  Constantly trying to disclaim our 
responsibility for the resulting mess makes me fucking ANGRY.

We either remove the API, or fix it.  I think fixing it is better, because my 
driver will be simpler and it's obvious noone wants to rewrite 50 drivers and 
break several of them.

I don't know how many times I can say the same thing...
Rusty.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-13 12:30           ` Rusty Russell
  2009-06-14  6:45             ` Herbert Xu
@ 2009-06-14  6:45             ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-14  6:45 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, David Miller, virtualization

On Sat, Jun 13, 2009 at 10:00:37PM +0930, Rusty Russell wrote:
> 
> But re your comment that the 67 drivers using TX_BUSY are doing it because of 
> driver bugs, that's hard to believe.  It either hardly ever happens (in which 
> case just drop the packet), or it happens (in which case we should handle it 
> correctly).

Most of them just do this:

start_xmit:

if (unlikely(queue is full)) {
	/* This should never happen. */
	return TX_BUSY;
}

transmit

if (queue is full)
	stop queue

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-03  3:17       ` Rusty Russell
  2009-06-08  5:22         ` Herbert Xu
@ 2009-06-08  5:22         ` Herbert Xu
  1 sibling, 0 replies; 44+ messages in thread
From: Herbert Xu @ 2009-06-08  5:22 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, David Miller, virtualization

On Wed, Jun 03, 2009 at 12:47:04PM +0930, Rusty Russell wrote:
> 
> We could figure out if we can take the worst-case packet, and underutilize
> our queue.  And fix the other *67* drivers.

Most of those are for debugging purposes, i.e., they'll never
happen unless the driver is buggy.

> Of course that doesn't even work, because we return NETDEV_TX_BUSY from dev.c!

If and when your driver becomes part of the core and it has to
feed into other drivers, then you can use this argument :)

> "Hi, core netdevs here.  Don't use NETDEV_TX_BUSY.   Yeah, we can't figure out
> how to avoid it either.  But y'know, just hack something together".

No you've misunderstood my complaint.  I'm not trying to get you
to replace NETDEV_TX_BUSY by the equally abhorrent queue in the
driver, I'm saying that you should stop the queue before you get
a packet that overflows by looking at the amount of free queue
space after transmitting each packet.

For most drivers this is easy to do.  What's so different about
virtio-net that makes this impossible?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02 23:45     ` Herbert Xu
  2009-06-03  3:17       ` Rusty Russell
@ 2009-06-03  3:17       ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-03  3:17 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, David Miller, virtualization

On Wed, 3 Jun 2009 09:15:32 am Herbert Xu wrote:
> On Tue, Jun 02, 2009 at 11:25:57PM +0930, Rusty Russell wrote:
> > Or, we could just "return NETDEV_TX_BUSY;".  I like that :)
>
> No you should fix it so that you check the queue status after
> transmitting a packet so we never get into this state in the
> first place.

We could figure out if we can take the worst-case packet, and underutilize
our queue.  And fix the other *67* drivers.

Of course that doesn't even work, because we return NETDEV_TX_BUSY from dev.c!

"Hi, core netdevs here.  Don't use NETDEV_TX_BUSY.   Yeah, we can't figure out
how to avoid it either.  But y'know, just hack something together".

Herbert, we are *better* than this!

How's this?  Tested for the virtio_net driver here.

[RFC] net: fix double-tcpdump problem with NETDEV_TX_BUSY.

Herbert shares a distain for drivers returning TX_BUSY because
network taps see packets twice when it's used.  Unfortunately, it's
ubiquitous.

This patch marks packets by (ab)using the "peeked" bit in the skb.
This bit is currently used for packets queued in a socket; we reset it
in dev_queue_xmit and set it when we hand the packet to
dev_queue_xmit_nit.

We also reset it on incoming packets: this is safe, but it might be
sufficient to reset it only in the loopback driver?

diff --git a/net/core/dev.c b/net/core/dev.c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1678,8 +1678,10 @@ int dev_hard_start_xmit(struct sk_buff *
 	int rc;

 	if (likely(!skb->next)) {
-		if (!list_empty(&ptype_all))
+		if (!list_empty(&ptype_all) && !skb->peeked) {
 			dev_queue_xmit_nit(skb, dev);
+			skb->peeked = true;
+		}

 		if (netif_needs_gso(dev, skb)) {
 			if (unlikely(dev_gso_segment(skb)))
@@ -1796,6 +1798,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;

+	skb->peeked = false;
+
 	/* GSO will handle the following emulations directly. */
 	if (netif_needs_gso(dev, skb))
 		goto gso;
@@ -1942,6 +1946,8 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);

+	skb->peeked = false;
+
 	/*
 	 * The code is rearranged so that the path is the most
 	 * short when CPU is congested, but is still operating.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
  2009-06-02  9:05 ` Herbert Xu
  2009-06-02 13:55   ` Rusty Russell
@ 2009-06-02 13:55   ` Rusty Russell
  1 sibling, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-06-02 13:55 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, virtualization

On Tue, 2 Jun 2009 06:35:52 pm Herbert Xu wrote:
> On Fri, May 29, 2009 at 11:46:04PM +0930, Rusty Russell wrote:
> > This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
> > "virtio: wean net driver off NETDEV_TX_BUSY".
> >
> > The complexity of queuing an skb (setting a tasklet to re-xmit) is
> > questionable, especially once we get rid of the other reason for the
> > tasklet in the next patch.
> >
> > If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
> > might be frowned upon, but it's common and not going away any time
> > soon.
>
> This looks like a step backwards to me.  If we have to do this
> to fix a bug, sure.  But just doing it for the sake of it smells
> wrong.

I disagree.  We've introduced a third queue, inside the driver, one element 
long.  That feels terribly, terribly hacky and wrong.

What do we do if it overflows?  Discard the packet, even if we have room in the 
tx queue.   And when do we flush this queue?  Well, that's a bit messy.  
Obviously we need to enable the tx interrupt when the tx queue is full, but we 
can't just netif_wake_queue, we have to also flush the queue.  We can't do that 
in an irq handler, since we need to block the normal tx path (or introduce 
another lock and disable interrupts in our xmit routine).  So we add a tasklet 
to do this re-transmission.

Or, we could just "return NETDEV_TX_BUSY;".  I like that :)

Hope that clarifies,
Rusty.

>
> Cheers,

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb.
@ 2009-05-29 14:16 Rusty Russell
  0 siblings, 0 replies; 44+ messages in thread
From: Rusty Russell @ 2009-05-29 14:16 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, virtualization


This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c
"virtio: wean net driver off NETDEV_TX_BUSY".

The complexity of queuing an skb (setting a tasklet to re-xmit) is
questionable, especially once we get rid of the other reason for the
tasklet in the next patch.

If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY.  It
might be frowned upon, but it's common and not going away any time
soon.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/net/virtio_net.c |   49 ++++++++++-------------------------------------
 1 file changed, 11 insertions(+), 38 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -47,9 +47,6 @@ struct virtnet_info
 	struct napi_struct napi;
 	unsigned int status;
 
-	/* The skb we couldn't send because buffers were full. */
-	struct sk_buff *last_xmit_skb;
-
 	/* If we need to free in a timer, this is it. */
 	struct timer_list xmit_free_timer;
 
@@ -116,9 +113,8 @@ static void skb_xmit_done(struct virtque
 	/* We were probably waiting for more output buffers. */
 	netif_wake_queue(vi->dev);
 
-	/* Make sure we re-xmit last_xmit_skb: if there are no more packets
-	 * queued, start_xmit won't be called. */
-	tasklet_schedule(&vi->tasklet);
+	if (vi->free_in_tasklet)
+		tasklet_schedule(&vi->tasklet);
 }
 
 static void receive_skb(struct net_device *dev, struct sk_buff *skb,
@@ -509,12 +505,7 @@ static void xmit_tasklet(unsigned long d
 	struct virtnet_info *vi = (void *)data;
 
 	netif_tx_lock_bh(vi->dev);
-	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
-		vi->svq->vq_ops->kick(vi->svq);
-		vi->last_xmit_skb = NULL;
-	}
-	if (vi->free_in_tasklet)
-		free_old_xmit_skbs(vi);
+	free_old_xmit_skbs(vi);
 	netif_tx_unlock_bh(vi->dev);
 }
 
@@ -526,27 +517,14 @@ again:
 	/* Free up any pending old buffers before queueing new ones. */
 	free_old_xmit_skbs(vi);
 
-	/* If we has a buffer left over from last time, send it now. */
-	if (unlikely(vi->last_xmit_skb) &&
-	    xmit_skb(vi, vi->last_xmit_skb) != 0)
-		goto stop_queue;
+	/* Put new one in send queue and do transmit */
+	__skb_queue_head(&vi->send, skb);
+	if (likely(xmit_skb(vi, skb) == 0)) {
+		vi->svq->vq_ops->kick(vi->svq);
+		return NETDEV_TX_OK;
+	}
 
-	vi->last_xmit_skb = NULL;
-
-	/* Put new one in send queue and do transmit */
-	if (likely(skb)) {
-		__skb_queue_head(&vi->send, skb);
-		if (xmit_skb(vi, skb) != 0) {
-			vi->last_xmit_skb = skb;
-			skb = NULL;
-			goto stop_queue;
-		}
-	}
-done:
-	vi->svq->vq_ops->kick(vi->svq);
-	return NETDEV_TX_OK;
-
-stop_queue:
+	/* Ring too full for this packet. */
 	pr_debug("%s: virtio not prepared to send\n", dev->name);
 	netif_stop_queue(dev);
 
@@ -557,12 +535,7 @@ stop_queue:
 		netif_start_queue(dev);
 		goto again;
 	}
-	if (skb) {
-		/* Drop this skb: we only queue one. */
-		vi->dev->stats.tx_dropped++;
-		kfree_skb(skb);
-	}
-	goto done;
+	return NETDEV_TX_BUSY;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2009-06-23  2:55 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-29 14:16 [PATCH 2/4] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb Rusty Russell
2009-06-02  8:07 ` Mark McLoughlin
2009-06-02 14:04   ` Rusty Russell
2009-06-02 14:04   ` Rusty Russell
2009-06-02  8:07 ` Mark McLoughlin
2009-06-02  9:05 ` Herbert Xu
2009-06-02  9:05 ` Herbert Xu
2009-06-02 13:55   ` Rusty Russell
2009-06-02 23:45     ` Herbert Xu
2009-06-02 23:45     ` Herbert Xu
2009-06-03  3:17       ` Rusty Russell
2009-06-08  5:22         ` Herbert Xu
2009-06-13 12:30           ` Rusty Russell
2009-06-13 12:30           ` Rusty Russell
2009-06-14  6:45             ` Herbert Xu
2009-06-18  7:17               ` Rusty Russell
2009-06-18  7:17               ` Rusty Russell
2009-06-18  7:34                 ` Herbert Xu
2009-06-18  7:34                 ` Herbert Xu
2009-06-19  3:37                   ` Rusty Russell
2009-06-19  4:36                     ` Herbert Xu
2009-06-19  4:36                     ` Herbert Xu
2009-06-19 13:50                       ` Rusty Russell
2009-06-19 14:10                         ` Herbert Xu
2009-06-19 14:10                         ` Herbert Xu
2009-06-22  2:39                           ` Rusty Russell
2009-06-22  2:39                           ` Rusty Russell
2009-06-19 13:50                       ` Rusty Russell
2009-06-22  5:46                       ` Krishna Kumar2
2009-06-22  5:46                       ` Krishna Kumar2
2009-06-22  7:34                         ` Herbert Xu
2009-06-22  7:34                         ` Herbert Xu
2009-06-22 13:41                           ` Krishna Kumar2
2009-06-22 13:41                           ` Krishna Kumar2
2009-06-22 18:25                           ` Matt Carlson
2009-06-22 18:25                           ` Matt Carlson
2009-06-23  2:54                             ` Herbert Xu
2009-06-23  2:54                             ` Herbert Xu
2009-06-19  3:37                   ` Rusty Russell
2009-06-14  6:45             ` Herbert Xu
2009-06-08  5:22         ` Herbert Xu
2009-06-03  3:17       ` Rusty Russell
2009-06-02 13:55   ` Rusty Russell
  -- strict thread matches above, loose matches on Subject: below --
2009-05-29 14:16 Rusty Russell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.