All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] sunvnet: improve error handling when a remote crashes
@ 2015-01-26 19:10 David L Stevens
  2015-01-26 19:48 ` David L Stevens
  2015-01-26 19:54 ` Sowmini Varadhan
  0 siblings, 2 replies; 7+ messages in thread
From: David L Stevens @ 2015-01-26 19:10 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sowmini Varadhan

If a remote machine crashes while there are pending transmit buffers, the
sunvnet driver reallocates the ring descriptors giving us enries that have
state VIO_DESC_FREE but also an allocated skb. This results in a BUG_ON()
call when the remote reboots and we reach that point in the ring.

This patch:

1) clears pending tx packets in the ring on port reset
2) changes a BUG_ON() to a pr_warn() when a remote host has given us an invalid
	descriptor state
3) collapses multiple active buffer frees in a ring to a single message per
	ring and adds the device name and remote MAC address

This fixes the particular problem of not cleaning up pending buffers on a
reset, but also prevents us from crashing if the remote handles descriptors
out of order or sets an unexpected state for a descriptor.

Signed-off-by: David L Stevens <david.stevens@oracle.com>
---
 drivers/net/ethernet/sun/sunvnet.c |   68 +++++++++++++++++++++---------------
 1 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index fe044f3..3733ae6 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -50,6 +50,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 #define	VNET_MAX_RETRIES	10
 
 static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
+static void vnet_port_reset(struct vnet_port *port);
 
 /* Ordered from largest major to lowest */
 static struct vio_version vnet_versions[] = {
@@ -736,9 +737,7 @@ ldc_ctrl:
 		vio_link_state_change(vio, event);
 
 		if (event == LDC_EVENT_RESET) {
-			port->rmtu = 0;
-			port->tso = true;
-			port->tsolen = 0;
+			vnet_port_reset(port);
 			vio_port_up(vio);
 		}
 		port->rx_event = 0;
@@ -934,36 +933,36 @@ static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
 
 	*pending = 0;
 
-	txi = dr->prod-1;
-	if (txi < 0)
-		txi = VNET_TX_RING_SIZE-1;
-
+	txi = dr->prod;
 	for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
 		struct vio_net_desc *d;
 
-		d = vio_dring_entry(dr, txi);
-
-		if (d->hdr.state == VIO_DESC_DONE) {
-			if (port->tx_bufs[txi].skb) {
-				BUG_ON(port->tx_bufs[txi].skb->next);
+		--txi;
+		if (txi < 0)
+			txi = VNET_TX_RING_SIZE-1;
 
-				port->tx_bufs[txi].skb->next = skb;
-				skb = port->tx_bufs[txi].skb;
-				port->tx_bufs[txi].skb = NULL;
+		d = vio_dring_entry(dr, txi);
 
-				ldc_unmap(port->vio.lp,
-					  port->tx_bufs[txi].cookies,
-					  port->tx_bufs[txi].ncookies);
-			}
-			d->hdr.state = VIO_DESC_FREE;
-		} else if (d->hdr.state == VIO_DESC_READY) {
+		if (d->hdr.state == VIO_DESC_READY) {
 			(*pending)++;
-		} else if (d->hdr.state == VIO_DESC_FREE) {
-			break;
+			continue;
 		}
-		--txi;
-		if (txi < 0)
-			txi = VNET_TX_RING_SIZE-1;
+		if (port->tx_bufs[txi].skb) {
+			if (d->hdr.state != VIO_DESC_DONE)
+				pr_warn("invalid ring buffer state %d\n",
+					d->hdr.state);
+			BUG_ON(port->tx_bufs[txi].skb->next);
+
+			port->tx_bufs[txi].skb->next = skb;
+			skb = port->tx_bufs[txi].skb;
+			port->tx_bufs[txi].skb = NULL;
+
+			ldc_unmap(port->vio.lp,
+				  port->tx_bufs[txi].cookies,
+				  port->tx_bufs[txi].ncookies);
+		} else if (d->hdr.state == VIO_DESC_FREE)
+			break;
+		d->hdr.state = VIO_DESC_FREE;
 	}
 	return skb;
 }
@@ -1633,7 +1632,9 @@ static const struct ethtool_ops vnet_ethtool_ops = {
 
 static void vnet_port_free_tx_bufs(struct vnet_port *port)
 {
+	struct net_device *dev = port->vp->dev;
 	struct vio_dring_state *dr;
+	bool active_freed = false;
 	int i;
 
 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
@@ -1649,8 +1650,7 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
 			continue;
 
 		d = vio_dring_entry(dr, i);
-		if (d->hdr.state == VIO_DESC_READY)
-			pr_warn("active transmit buffers freed\n");
+		active_freed |= d->hdr.state == VIO_DESC_READY;
 
 		ldc_unmap(port->vio.lp,
 			  port->tx_bufs[i].cookies,
@@ -1662,6 +1662,9 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
 	ldc_free_exp_dring(port->vio.lp, dr->base,
 			   (dr->entry_size * dr->num_entries),
 			   dr->cookies, dr->ncookies);
+	if (active_freed)
+		pr_warn("%s: active transmit buffers freed for remote %pM\n",
+			dev->name, port->raddr);
 	dr->base = NULL;
 	dr->entry_size = 0;
 	dr->num_entries = 0;
@@ -1669,6 +1672,15 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
 	dr->ncookies = 0;
 }
 
+static void vnet_port_reset(struct vnet_port *port)
+{
+	del_timer(&port->clean_timer);
+	vnet_port_free_tx_bufs(port);
+	port->rmtu = 0;
+	port->tso = true;
+	port->tsolen = 0;
+}
+
 static int vnet_port_alloc_tx_ring(struct vnet_port *port)
 {
 	struct vio_dring_state *dr;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] sunvnet: improve error handling when a remote crashes
  2015-01-26 19:10 [PATCH net-next] sunvnet: improve error handling when a remote crashes David L Stevens
@ 2015-01-26 19:48 ` David L Stevens
  2015-01-26 19:54 ` Sowmini Varadhan
  1 sibling, 0 replies; 7+ messages in thread
From: David L Stevens @ 2015-01-26 19:48 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sowmini Varadhan

This patch has a dependency on another one in my tree, so it won't apply; I'll
resubmit as a set.

							+-DLS

On 01/26/2015 02:10 PM, David L Stevens wrote:
> If a remote machine crashes while there are pending transmit buffers, the
> sunvnet driver reallocates the ring descriptors giving us enries that have
> state VIO_DESC_FREE but also an allocated skb. This results in a BUG_ON()
> call when the remote reboots and we reach that point in the ring.
> 
> This patch:
> 
> 1) clears pending tx packets in the ring on port reset
> 2) changes a BUG_ON() to a pr_warn() when a remote host has given us an invalid
> 	descriptor state
> 3) collapses multiple active buffer frees in a ring to a single message per
> 	ring and adds the device name and remote MAC address
> 
> This fixes the particular problem of not cleaning up pending buffers on a
> reset, but also prevents us from crashing if the remote handles descriptors
> out of order or sets an unexpected state for a descriptor.
> 
> Signed-off-by: David L Stevens <david.stevens@oracle.com>
> ---
>  drivers/net/ethernet/sun/sunvnet.c |   68 +++++++++++++++++++++---------------
>  1 files changed, 40 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
> index fe044f3..3733ae6 100644
> --- a/drivers/net/ethernet/sun/sunvnet.c
> +++ b/drivers/net/ethernet/sun/sunvnet.c
> @@ -50,6 +50,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
>  #define	VNET_MAX_RETRIES	10
>  
>  static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
> +static void vnet_port_reset(struct vnet_port *port);
>  
>  /* Ordered from largest major to lowest */
>  static struct vio_version vnet_versions[] = {
> @@ -736,9 +737,7 @@ ldc_ctrl:
>  		vio_link_state_change(vio, event);
>  
>  		if (event == LDC_EVENT_RESET) {
> -			port->rmtu = 0;
> -			port->tso = true;
> -			port->tsolen = 0;
> +			vnet_port_reset(port);
>  			vio_port_up(vio);
>  		}
>  		port->rx_event = 0;
> @@ -934,36 +933,36 @@ static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
>  
>  	*pending = 0;
>  
> -	txi = dr->prod-1;
> -	if (txi < 0)
> -		txi = VNET_TX_RING_SIZE-1;
> -
> +	txi = dr->prod;
>  	for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
>  		struct vio_net_desc *d;
>  
> -		d = vio_dring_entry(dr, txi);
> -
> -		if (d->hdr.state == VIO_DESC_DONE) {
> -			if (port->tx_bufs[txi].skb) {
> -				BUG_ON(port->tx_bufs[txi].skb->next);
> +		--txi;
> +		if (txi < 0)
> +			txi = VNET_TX_RING_SIZE-1;
>  
> -				port->tx_bufs[txi].skb->next = skb;
> -				skb = port->tx_bufs[txi].skb;
> -				port->tx_bufs[txi].skb = NULL;
> +		d = vio_dring_entry(dr, txi);
>  
> -				ldc_unmap(port->vio.lp,
> -					  port->tx_bufs[txi].cookies,
> -					  port->tx_bufs[txi].ncookies);
> -			}
> -			d->hdr.state = VIO_DESC_FREE;
> -		} else if (d->hdr.state == VIO_DESC_READY) {
> +		if (d->hdr.state == VIO_DESC_READY) {
>  			(*pending)++;
> -		} else if (d->hdr.state == VIO_DESC_FREE) {
> -			break;
> +			continue;
>  		}
> -		--txi;
> -		if (txi < 0)
> -			txi = VNET_TX_RING_SIZE-1;
> +		if (port->tx_bufs[txi].skb) {
> +			if (d->hdr.state != VIO_DESC_DONE)
> +				pr_warn("invalid ring buffer state %d\n",
> +					d->hdr.state);
> +			BUG_ON(port->tx_bufs[txi].skb->next);
> +
> +			port->tx_bufs[txi].skb->next = skb;
> +			skb = port->tx_bufs[txi].skb;
> +			port->tx_bufs[txi].skb = NULL;
> +
> +			ldc_unmap(port->vio.lp,
> +				  port->tx_bufs[txi].cookies,
> +				  port->tx_bufs[txi].ncookies);
> +		} else if (d->hdr.state == VIO_DESC_FREE)
> +			break;
> +		d->hdr.state = VIO_DESC_FREE;
>  	}
>  	return skb;
>  }
> @@ -1633,7 +1632,9 @@ static const struct ethtool_ops vnet_ethtool_ops = {
>  
>  static void vnet_port_free_tx_bufs(struct vnet_port *port)
>  {
> +	struct net_device *dev = port->vp->dev;
>  	struct vio_dring_state *dr;
> +	bool active_freed = false;
>  	int i;
>  
>  	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
> @@ -1649,8 +1650,7 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
>  			continue;
>  
>  		d = vio_dring_entry(dr, i);
> -		if (d->hdr.state == VIO_DESC_READY)
> -			pr_warn("active transmit buffers freed\n");
> +		active_freed |= d->hdr.state == VIO_DESC_READY;
>  
>  		ldc_unmap(port->vio.lp,
>  			  port->tx_bufs[i].cookies,
> @@ -1662,6 +1662,9 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
>  	ldc_free_exp_dring(port->vio.lp, dr->base,
>  			   (dr->entry_size * dr->num_entries),
>  			   dr->cookies, dr->ncookies);
> +	if (active_freed)
> +		pr_warn("%s: active transmit buffers freed for remote %pM\n",
> +			dev->name, port->raddr);
>  	dr->base = NULL;
>  	dr->entry_size = 0;
>  	dr->num_entries = 0;
> @@ -1669,6 +1672,15 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
>  	dr->ncookies = 0;
>  }
>  
> +static void vnet_port_reset(struct vnet_port *port)
> +{
> +	del_timer(&port->clean_timer);
> +	vnet_port_free_tx_bufs(port);
> +	port->rmtu = 0;
> +	port->tso = true;
> +	port->tsolen = 0;
> +}
> +
>  static int vnet_port_alloc_tx_ring(struct vnet_port *port)
>  {
>  	struct vio_dring_state *dr;
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] sunvnet: improve error handling when a remote crashes
  2015-01-26 19:10 [PATCH net-next] sunvnet: improve error handling when a remote crashes David L Stevens
  2015-01-26 19:48 ` David L Stevens
@ 2015-01-26 19:54 ` Sowmini Varadhan
  2015-01-26 20:19   ` David L Stevens
  1 sibling, 1 reply; 7+ messages in thread
From: Sowmini Varadhan @ 2015-01-26 19:54 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev




> @@ -934,36 +933,36 @@ static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
>  
>  	*pending = 0;
>  
> -	txi = dr->prod-1;
> -	if (txi < 0)
> -		txi = VNET_TX_RING_SIZE-1;
> -
> +	txi = dr->prod;

As I understand it, this starts at dr->prod and goes through all
descriptors, cleaning up !READY descriptors as it goes around.

I think you'll have a higher reclaim rate for finding !READY if you
started at dr->cons instead: dr->cons is the one that was last ACK'ed,
and that ack would only have been sent after the peer had marked the 
descriptor as DONE. (consumer would have had a chance to read more
descriptors, by the time the tx-reclaim loop goes around) 

> +		if (port->tx_bufs[txi].skb) {
> +			if (d->hdr.state != VIO_DESC_DONE)
> +				pr_warn("invalid ring buffer state %d\n",
> +					d->hdr.state);

I would even suggest skipping the pr_warn (maybe make it a viodbg
instead) as it might alarm the end-user (who cannot really do 
anything about it other than call us anyway :-)).

>  			   dr->cookies, dr->ncookies);
> +	if (active_freed)
> +		pr_warn("%s: active transmit buffers freed for remote %pM\n",
> +			dev->name, port->raddr);

Same comment as above.

In general, I think we need some sysfs/ethtool  bean-counters/statistics
for sunvnet, to keep track of this sort of thing efficiently in 
a production env without triggering red-herrings calls.

--Sowmini

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] sunvnet: improve error handling when a remote crashes
  2015-01-26 19:54 ` Sowmini Varadhan
@ 2015-01-26 20:19   ` David L Stevens
  2015-01-26 20:29     ` Sowmini Varadhan
  0 siblings, 1 reply; 7+ messages in thread
From: David L Stevens @ 2015-01-26 20:19 UTC (permalink / raw)
  To: Sowmini Varadhan; +Cc: David Miller, netdev



On 01/26/2015 02:54 PM, Sowmini Varadhan wrote:
> 
> 
> 
>> @@ -934,36 +933,36 @@ static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
>>  
>>  	*pending = 0;
>>  
>> -	txi = dr->prod-1;
>> -	if (txi < 0)
>> -		txi = VNET_TX_RING_SIZE-1;
>> -
>> +	txi = dr->prod;
> 
> As I understand it, this starts at dr->prod and goes through all
> descriptors, cleaning up !READY descriptors as it goes around.
> 
> I think you'll have a higher reclaim rate for finding !READY if you
> started at dr->cons instead: dr->cons is the one that was last ACK'ed,
> and that ack would only have been sent after the peer had marked the 
> descriptor as DONE. (consumer would have had a chance to read more
> descriptors, by the time the tx-reclaim loop goes around) 

	Actually, it starts at dr->prod-1, and it stops as soon as it
gets an already-freed descriptor. It could possibly free something
marked as DONE before we've been acked (ie, faster), but it could look
at stuff it doesn't have to. I wanted to be conservative here, though,
because we don't want to miss any and have unfreed data until we go
around the ring. Any pending (in any state) would begin at dr->prod-1,
so there can't be any races with an ACK there.
	At any rate, the intent for this patch is to check and free
skbs in states other than just VIO_DESC_READY and not requiring that
it be VIO_DESC_DONE when there is an allocated buffer associated with it.
The descriptors indices we look at are unchanged by this patch.

> 
>> +		if (port->tx_bufs[txi].skb) {
>> +			if (d->hdr.state != VIO_DESC_DONE)
>> +				pr_warn("invalid ring buffer state %d\n",
>> +					d->hdr.state);
> 
> I would even suggest skipping the pr_warn (maybe make it a viodbg
> instead) as it might alarm the end-user (who cannot really do 
> anything about it other than call us anyway :-)).

	It should only happen if there is active traffic when a remote
crashes, but if a remote is giving us garbage in other cases, I think an
admin would want to check that out.
	A crash based on the behavior of a remote was definitely too much, and
warnings can be turned off too. viodbg() would be useless here-- you wouldn't
turn it on to find these, and if these are occurring without a transmitter
crashing, it'd indicate something more serious.
	So, I think this one should stay (as a warning, which can also be turned
off). The next one is debatable, since it really is an ordinary reset or shutdown
situation and could be removed entirely. I changed the existing warning to at
least be done only once per ring and give some more info, but removing it entirely
seems reasonable to me too.

> 
>>  			   dr->cookies, dr->ncookies);
>> +	if (active_freed)
>> +		pr_warn("%s: active transmit buffers freed for remote %pM\n",
>> +			dev->name, port->raddr);
> 
> Same comment as above.

						+-DLS

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] sunvnet: improve error handling when a remote crashes
  2015-01-26 20:19   ` David L Stevens
@ 2015-01-26 20:29     ` Sowmini Varadhan
  2015-01-26 20:53       ` David L Stevens
  0 siblings, 1 reply; 7+ messages in thread
From: Sowmini Varadhan @ 2015-01-26 20:29 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev

On 01/26/2015 03:19 PM, David L Stevens wrote:

>> I think you'll have a higher reclaim rate for finding !READY if you
>> started at dr->cons instead: dr->cons is the one that was last ACK'ed,
>> and that ack would only have been sent after the peer had marked the
>> descriptor as DONE. (consumer would have had a chance to read more
>> descriptors, by the time the tx-reclaim loop goes around)
   :
> The descriptors indices we look at are unchanged by this patch.

Understood, and what I suggested was an optimization for tx-reclaim.


> 	It should only happen if there is active traffic when a remote
> crashes, but if a remote is giving us garbage in other cases, I think an
> admin would want to check that out.

This allows one DomU to spam up everyone else's log file by crashing
periodically.

In any case, the contents of the message do not in any way suggest
that an admin (of the DomU, who probably has no control over
other DomU's anyway) should go and baby-sit some other DomU that
just crashed.

ymmv.

--Sowmini

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] sunvnet: improve error handling when a remote crashes
  2015-01-26 20:29     ` Sowmini Varadhan
@ 2015-01-26 20:53       ` David L Stevens
  2015-01-26 22:45         ` Sowmini Varadhan
  0 siblings, 1 reply; 7+ messages in thread
From: David L Stevens @ 2015-01-26 20:53 UTC (permalink / raw)
  To: sowmini.varadhan; +Cc: David Miller, netdev



On 01/26/2015 03:29 PM, Sowmini Varadhan wrote:

>>     It should only happen if there is active traffic when a remote
>> crashes, but if a remote is giving us garbage in other cases, I think an
>> admin would want to check that out.
> 
> This allows one DomU to spam up everyone else's log file by crashing
> periodically.
> 
> In any case, the contents of the message do not in any way suggest
> that an admin (of the DomU, who probably has no control over
> other DomU's anyway) should go and baby-sit some other DomU that
> just crashed.

I demoted it to a notice and we can change it to a counter when we add
stats. This way it's clear that it's an unexpected state and it's easy
to see them when we want them.

						+-DLS

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] sunvnet: improve error handling when a remote crashes
  2015-01-26 20:53       ` David L Stevens
@ 2015-01-26 22:45         ` Sowmini Varadhan
  0 siblings, 0 replies; 7+ messages in thread
From: Sowmini Varadhan @ 2015-01-26 22:45 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev

On (01/26/15 15:53), David L Stevens wrote:
> 
> I demoted it to a notice and we can change it to a counter when we add
> stats. This way it's clear that it's an unexpected state and it's easy
> to see them when we want them.

fair enough.

--Sowmini

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-01-26 22:45 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-26 19:10 [PATCH net-next] sunvnet: improve error handling when a remote crashes David L Stevens
2015-01-26 19:48 ` David L Stevens
2015-01-26 19:54 ` Sowmini Varadhan
2015-01-26 20:19   ` David L Stevens
2015-01-26 20:29     ` Sowmini Varadhan
2015-01-26 20:53       ` David L Stevens
2015-01-26 22:45         ` Sowmini Varadhan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.