All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] bnx2x: guard stats_pending with stats_lock in all cases
@ 2013-08-12 19:08 Neal Cardwell
  2013-08-12 20:52 ` Dmitry Kravkov
  0 siblings, 1 reply; 3+ messages in thread
From: Neal Cardwell @ 2013-08-12 19:08 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Neal Cardwell, Eilon Greenstein, Vladislav Zolotarov,
	Yaniv Rosner, Merav Sicron, Eric Dumazet, Tom Herbert,
	Havard Skinnemoen, Sanjay Hortikar

As 9bcb8018cf ("bnx2x: Fix the race on bp->stats_pending") noted, we
can have a race on bp->stats_pending between the timer and a
STATS_EVENT_LINK_UP event handler.

But it seems we can also have the following two races on
bp->stats_pending between the timer and a STATS_EVENT_STOP handler:

Scenario A:
-----------

thread 1:  bnx2x_timer() thread in bnx2x_storm_stats_post():
		/* send FW stats ramrod */
		rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
				   U64_HI(bp->fw_stats_req_mapping),
				   U64_LO(bp->fw_stats_req_mapping),
				   NONE_CONNECTION_TYPE);
		if (rc == 0)

thread 2: ethtool thread:
bnx2x_stats_handle(bp, STATS_EVENT_STOP)
  -> bnx2x_stats_stop()
    -> bnx2x_storm_stats_update()
	bp->stats_pending = 0;

thread 1:  bnx2x_timer() thread in bnx2x_storm_stats_post():
			bp->stats_pending = 1;

If the action interleaves in this way we end up with a
bp->stats_pending value of 1 when there are no stats pending.

Scenario B:
-----------
thread 1:  bnx2x_timer() thread in bnx2x_stats_update():
		if (bnx2x_storm_stats_update(bp)) {
			if (bp->stats_pending == 3) { // false
			reg1 = bp->stats_pending      // reg1=1

thread 2: ethtool thread:
bnx2x_stats_handle(bp, STATS_EVENT_STOP)
  -> bnx2x_stats_stop()
    -> bnx2x_storm_stats_update()
	bp->stats_pending = 0;

thread 1:  bnx2x_timer() thread in bnx2x_stats_update():
			reg1++				// reg1=2
			bp->stats_pending = reg1;	// stats_pending=2
			return;

If the action interleaves in this way we end up with a non-zero
bp->stats_pending value when there are no stats pending.

-----------

These are suspected as possible contributors to occasional
bnx2x_panic() dumps we see of the form:

 storm stats were not updated for 3 times

When these panic messages happen we see the NIC hang.

The proposed fix is for all, not just some, of the reads and writes to
stats_pending to be guarded by stats_lock. So now all accesses to
stats_pending and stats_couner are guarded by stats_lock.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Vladislav Zolotarov <vladz@broadcom.com>
Cc: Yaniv Rosner <yanivr@broadcom.com>
Cc: Merav Sicron <meravs@broadcom.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Havard Skinnemoen <hskinnemoen@google.com>
Cc: Sanjay Hortikar <horti@google.com>
Change-Id: Ic389c4355ef4a3e01da8f1f4c2019e75dbb5ddf6
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 47 +++++++++++++----------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index 98366ab..d72a630 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
@@ -123,16 +123,11 @@ static void bnx2x_dp_stats(struct bnx2x *bp)
  */
 static void bnx2x_storm_stats_post(struct bnx2x *bp)
 {
+	spin_lock_bh(&bp->stats_lock);
+
 	if (!bp->stats_pending) {
 		int rc;
 
-		spin_lock_bh(&bp->stats_lock);
-
-		if (bp->stats_pending) {
-			spin_unlock_bh(&bp->stats_lock);
-			return;
-		}
-
 		bp->fw_stats_req->hdr.drv_stats_counter =
 			cpu_to_le16(bp->stats_counter++);
 
@@ -151,8 +146,9 @@ static void bnx2x_storm_stats_post(struct bnx2x *bp)
 		if (rc == 0)
 			bp->stats_pending = 1;
 
-		spin_unlock_bh(&bp->stats_lock);
 	}
+
+	spin_unlock_bh(&bp->stats_lock);
 }
 
 static void bnx2x_hw_stats_post(struct bnx2x *bp)
@@ -888,9 +884,7 @@ static int bnx2x_storm_stats_validate_counters(struct bnx2x *bp)
 	/* Make sure we use the value of the counter
 	 * used for sending the last stats ramrod.
 	 */
-	spin_lock_bh(&bp->stats_lock);
 	cur_stats_counter = bp->stats_counter - 1;
-	spin_unlock_bh(&bp->stats_lock);
 
 	/* are storm stats valid? */
 	if (le16_to_cpu(counters->xstats_counter) != cur_stats_counter) {
@@ -923,7 +917,8 @@ static int bnx2x_storm_stats_validate_counters(struct bnx2x *bp)
 	return 0;
 }
 
-static int bnx2x_storm_stats_update(struct bnx2x *bp)
+static int bnx2x_storm_stats_update(struct bnx2x *bp,
+				    bool track_pending)
 {
 	struct tstorm_per_port_stats *tport =
 				&bp->fw_stats_data->port.tstorm_port_statistics;
@@ -934,9 +929,24 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
 	struct bnx2x_eth_stats_old *estats_old = &bp->eth_stats_old;
 	int i;
 
+	spin_lock_bh(&bp->stats_lock);
+
 	/* vfs stat counter is managed by pf */
-	if (IS_PF(bp) && bnx2x_storm_stats_validate_counters(bp))
+	if (IS_PF(bp) && bnx2x_storm_stats_validate_counters(bp)) {
+		bool pending_timeout = false;
+
+		if (track_pending && (bp->stats_pending++ == 3))
+			pending_timeout = true;
+
+		spin_unlock_bh(&bp->stats_lock);
+
+		if (pending_timeout) {
+			BNX2X_ERR("storm stats were not updated for 3 times\n");
+			bnx2x_panic();
+		}
+
 		return -EAGAIN;
+	}
 
 	estats->error_bytes_received_hi = 0;
 	estats->error_bytes_received_lo = 0;
@@ -1118,6 +1128,8 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
 
 	bp->stats_pending = 0;
 
+	spin_unlock_bh(&bp->stats_lock);
+
 	return 0;
 }
 
@@ -1237,18 +1249,13 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 		if (bp->port.pmf)
 			bnx2x_hw_stats_update(bp);
 
-		if (bnx2x_storm_stats_update(bp)) {
-			if (bp->stats_pending++ == 3) {
-				BNX2X_ERR("storm stats were not updated for 3 times\n");
-				bnx2x_panic();
-			}
+		if (bnx2x_storm_stats_update(bp, true))
 			return;
-		}
 	} else {
 		/* vf doesn't collect HW statistics, and doesn't get completions
 		 * perform only update
 		 */
-		bnx2x_storm_stats_update(bp);
+		bnx2x_storm_stats_update(bp, false);
 	}
 
 	bnx2x_net_stats_update(bp);
@@ -1337,7 +1344,7 @@ static void bnx2x_stats_stop(struct bnx2x *bp)
 	if (bp->port.pmf)
 		update = (bnx2x_hw_stats_update(bp) == 0);
 
-	update |= (bnx2x_storm_stats_update(bp) == 0);
+	update |= (bnx2x_storm_stats_update(bp, false) == 0);
 
 	if (update) {
 		bnx2x_net_stats_update(bp);
-- 
1.8.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net] bnx2x: guard stats_pending with stats_lock in all cases
  2013-08-12 19:08 [PATCH net] bnx2x: guard stats_pending with stats_lock in all cases Neal Cardwell
@ 2013-08-12 20:52 ` Dmitry Kravkov
  2013-08-13 17:57   ` Neal Cardwell
  0 siblings, 1 reply; 3+ messages in thread
From: Dmitry Kravkov @ 2013-08-12 20:52 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: David Miller, netdev, Eilon Greenstein, Vladislav Zolotarov,
	Yaniv Rosner, Merav Sicron, Eric Dumazet, Tom Herbert,
	Havard Skinnemoen, Sanjay Hortikar, Dmitry Kravkov

On Mon, Aug 12, 2013 at 10:08 PM, Neal Cardwell <ncardwell@google.com> wrote:
>
> As 9bcb8018cf ("bnx2x: Fix the race on bp->stats_pending") noted, we
> can have a race on bp->stats_pending between the timer and a
> STATS_EVENT_LINK_UP event handler.
>
> But it seems we can also have the following two races on
> bp->stats_pending between the timer and a STATS_EVENT_STOP handler:
>
> Scenario A:
> -----------
>
> thread 1:  bnx2x_timer() thread in bnx2x_storm_stats_post():
>                 /* send FW stats ramrod */
>                 rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
>                                    U64_HI(bp->fw_stats_req_mapping),
>                                    U64_LO(bp->fw_stats_req_mapping),
>                                    NONE_CONNECTION_TYPE);
>                 if (rc == 0)
>
> thread 2: ethtool thread:
> bnx2x_stats_handle(bp, STATS_EVENT_STOP)
>   -> bnx2x_stats_stop()
>     -> bnx2x_storm_stats_update()
>         bp->stats_pending = 0;
>
> thread 1:  bnx2x_timer() thread in bnx2x_storm_stats_post():
>                         bp->stats_pending = 1;
>
> If the action interleaves in this way we end up with a
> bp->stats_pending value of 1 when there are no stats pending.
>
> Scenario B:
> -----------
> thread 1:  bnx2x_timer() thread in bnx2x_stats_update():
>                 if (bnx2x_storm_stats_update(bp)) {
>                         if (bp->stats_pending == 3) { // false
>                         reg1 = bp->stats_pending      // reg1=1
>
> thread 2: ethtool thread:
> bnx2x_stats_handle(bp, STATS_EVENT_STOP)
>   -> bnx2x_stats_stop()
>     -> bnx2x_storm_stats_update()
>         bp->stats_pending = 0;
>
> thread 1:  bnx2x_timer() thread in bnx2x_stats_update():
>                         reg1++                          // reg1=2
>                         bp->stats_pending = reg1;       // stats_pending=2
>                         return;
>
> If the action interleaves in this way we end up with a non-zero
> bp->stats_pending value when there are no stats pending.
>
> -----------
>
> These are suspected as possible contributors to occasional
> bnx2x_panic() dumps we see of the form:
>
>  storm stats were not updated for 3 times
>
> When these panic messages happen we see the NIC hang.
>
> The proposed fix is for all, not just some, of the reads and writes to
> stats_pending to be guarded by stats_lock. So now all accesses to
> stats_pending and stats_couner are guarded by stats_lock.
>
Neal, thanks for looking into this.

There's another race, where two flows
stats_update() and stats_start() may send two queries in parallel
which will cause FW to assert.

I'm preparing the set for net, which includes the fix  for this race
also(will be out in a couple of hours)

I will be more than happy if you will take a look.

Thanks
Dmitry


> Signed-off-by: Neal Cardwell <ncardwell@google.com>
> Cc: Eilon Greenstein <eilong@broadcom.com>
> Cc: Vladislav Zolotarov <vladz@broadcom.com>
> Cc: Yaniv Rosner <yanivr@broadcom.com>
> Cc: Merav Sicron <meravs@broadcom.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Tom Herbert <therbert@google.com>
> Cc: Havard Skinnemoen <hskinnemoen@google.com>
> Cc: Sanjay Hortikar <horti@google.com>
> Change-Id: Ic389c4355ef4a3e01da8f1f4c2019e75dbb5ddf6
> ---
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 47 +++++++++++++----------
>  1 file changed, 27 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
> index 98366ab..d72a630 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
> @@ -123,16 +123,11 @@ static void bnx2x_dp_stats(struct bnx2x *bp)
>   */
>  static void bnx2x_storm_stats_post(struct bnx2x *bp)
>  {
> +       spin_lock_bh(&bp->stats_lock);
> +
>         if (!bp->stats_pending) {
>                 int rc;
>
> -               spin_lock_bh(&bp->stats_lock);
> -
> -               if (bp->stats_pending) {
> -                       spin_unlock_bh(&bp->stats_lock);
> -                       return;
> -               }
> -
>                 bp->fw_stats_req->hdr.drv_stats_counter =
>                         cpu_to_le16(bp->stats_counter++);
>
> @@ -151,8 +146,9 @@ static void bnx2x_storm_stats_post(struct bnx2x *bp)
>                 if (rc == 0)
>                         bp->stats_pending = 1;
>
> -               spin_unlock_bh(&bp->stats_lock);
>         }
> +
> +       spin_unlock_bh(&bp->stats_lock);
>  }
>
>  static void bnx2x_hw_stats_post(struct bnx2x *bp)
> @@ -888,9 +884,7 @@ static int bnx2x_storm_stats_validate_counters(struct bnx2x *bp)
>         /* Make sure we use the value of the counter
>          * used for sending the last stats ramrod.
>          */
> -       spin_lock_bh(&bp->stats_lock);
>         cur_stats_counter = bp->stats_counter - 1;
> -       spin_unlock_bh(&bp->stats_lock);
>
>         /* are storm stats valid? */
>         if (le16_to_cpu(counters->xstats_counter) != cur_stats_counter) {
> @@ -923,7 +917,8 @@ static int bnx2x_storm_stats_validate_counters(struct bnx2x *bp)
>         return 0;
>  }
>
> -static int bnx2x_storm_stats_update(struct bnx2x *bp)
> +static int bnx2x_storm_stats_update(struct bnx2x *bp,
> +                                   bool track_pending)
>  {
>         struct tstorm_per_port_stats *tport =
>                                 &bp->fw_stats_data->port.tstorm_port_statistics;
> @@ -934,9 +929,24 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
>         struct bnx2x_eth_stats_old *estats_old = &bp->eth_stats_old;
>         int i;
>
> +       spin_lock_bh(&bp->stats_lock);
> +
>         /* vfs stat counter is managed by pf */
> -       if (IS_PF(bp) && bnx2x_storm_stats_validate_counters(bp))
> +       if (IS_PF(bp) && bnx2x_storm_stats_validate_counters(bp)) {
> +               bool pending_timeout = false;
> +
> +               if (track_pending && (bp->stats_pending++ == 3))
> +                       pending_timeout = true;
> +
> +               spin_unlock_bh(&bp->stats_lock);
> +
> +               if (pending_timeout) {
> +                       BNX2X_ERR("storm stats were not updated for 3 times\n");
> +                       bnx2x_panic();
> +               }
> +
>                 return -EAGAIN;
> +       }
>
>         estats->error_bytes_received_hi = 0;
>         estats->error_bytes_received_lo = 0;
> @@ -1118,6 +1128,8 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
>
>         bp->stats_pending = 0;
>
> +       spin_unlock_bh(&bp->stats_lock);
> +
>         return 0;
>  }
>
> @@ -1237,18 +1249,13 @@ static void bnx2x_stats_update(struct bnx2x *bp)
>                 if (bp->port.pmf)
>                         bnx2x_hw_stats_update(bp);
>
> -               if (bnx2x_storm_stats_update(bp)) {
> -                       if (bp->stats_pending++ == 3) {
> -                               BNX2X_ERR("storm stats were not updated for 3 times\n");
> -                               bnx2x_panic();
> -                       }
> +               if (bnx2x_storm_stats_update(bp, true))
>                         return;
> -               }
>         } else {
>                 /* vf doesn't collect HW statistics, and doesn't get completions
>                  * perform only update
>                  */
> -               bnx2x_storm_stats_update(bp);
> +               bnx2x_storm_stats_update(bp, false);
>         }
>
>         bnx2x_net_stats_update(bp);
> @@ -1337,7 +1344,7 @@ static void bnx2x_stats_stop(struct bnx2x *bp)
>         if (bp->port.pmf)
>                 update = (bnx2x_hw_stats_update(bp) == 0);
>
> -       update |= (bnx2x_storm_stats_update(bp) == 0);
> +       update |= (bnx2x_storm_stats_update(bp, false) == 0);
>
>         if (update) {
>                 bnx2x_net_stats_update(bp);
> --
> 1.8.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html




-- 
Best regards,
Dmitry

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net] bnx2x: guard stats_pending with stats_lock in all cases
  2013-08-12 20:52 ` Dmitry Kravkov
@ 2013-08-13 17:57   ` Neal Cardwell
  0 siblings, 0 replies; 3+ messages in thread
From: Neal Cardwell @ 2013-08-13 17:57 UTC (permalink / raw)
  To: Dmitry Kravkov
  Cc: David Miller, netdev, Eilon Greenstein, Vladislav Zolotarov,
	Yaniv Rosner, Merav Sicron, Eric Dumazet, Tom Herbert,
	Havard Skinnemoen, Sanjay Hortikar, Dmitry Kravkov

On Mon, Aug 12, 2013 at 4:52 PM, Dmitry Kravkov <dkravkov@gmail.com> wrote:
> Neal, thanks for looking into this.
>
> There's another race, where two flows
> stats_update() and stats_start() may send two queries in parallel
> which will cause FW to assert.
>
> I'm preparing the set for net, which includes the fix  for this race
> also(will be out in a couple of hours)
>
> I will be more than happy if you will take a look.
>
> Thanks
> Dmitry

Thanks! Your "bnx2x: protect different statistics flows" patch looks
like a superset of the synchronization I was proposing in this patch,
so we can drop mine.

Thanks,
neal

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-08-13 17:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-12 19:08 [PATCH net] bnx2x: guard stats_pending with stats_lock in all cases Neal Cardwell
2013-08-12 20:52 ` Dmitry Kravkov
2013-08-13 17:57   ` Neal Cardwell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.