All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net] bnx2x: Fix statistics locking scheme
@ 2015-03-18 14:14 Yuval Mintz
  2015-03-18 19:10 ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Yuval Mintz @ 2015-03-18 14:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Yuval Mintz, Ariel Elior

Statistics' state-machine in bnx2x driver must be synced with various driver
flows, but its current locking scheme manages to be wasteful [using 2 locks +
additional local variable] and prone to race-conditions at the same time,
as the state-machine and 'action' are being accessed under different locks.

This patch cleans up said logic, leaving us with a single lock for the entire
flow and removing the possible races.

Changes from v1:
	Failure to acquire lock fails flow instead of printing a warning and
	allowing access to the critical section.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
---
Hi Dave,

Please consider applying this to 'net'.

Thanks,
Yuval
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |   4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |  29 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |   4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 164 ++++++++++------------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h |   6 +-
 5 files changed, 101 insertions(+), 106 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 756053c..6a69dbd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1811,7 +1811,7 @@ struct bnx2x {
 	int			stats_state;
 
 	/* used for synchronization of concurrent threads statistics handling */
-	spinlock_t		stats_lock;
+	struct semaphore	stats_lock;
 
 	/* used by dmae command loader */
 	struct dmae_command	stats_dmae;
@@ -1935,8 +1935,6 @@ struct bnx2x {
 
 	int fp_array_size;
 	u32 dump_preset_idx;
-	bool					stats_started;
-	struct semaphore			stats_sema;
 
 	u8					phys_port_id[ETH_ALEN];
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index bef750a..8267981 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12038,8 +12038,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 	mutex_init(&bp->fw_mb_mutex);
 	mutex_init(&bp->drv_info_mutex);
 	bp->drv_info_mng_owner = false;
-	spin_lock_init(&bp->stats_lock);
-	sema_init(&bp->stats_sema, 1);
+	sema_init(&bp->stats_lock, 1);
 
 	INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task);
 	INIT_DELAYED_WORK(&bp->sp_rtnl_task, bnx2x_sp_rtnl_task);
@@ -13668,9 +13667,13 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
 	cancel_delayed_work_sync(&bp->sp_task);
 	cancel_delayed_work_sync(&bp->period_task);
 
-	spin_lock_bh(&bp->stats_lock);
+	 if (down_timeout(&bp->stats_lock, HZ / 10)) {
+		BNX2X_ERR("Unable to acquire stats lock\n");
+		return -EBUSY;
+	}
+
 	bp->stats_state = STATS_STATE_DISABLED;
-	spin_unlock_bh(&bp->stats_lock);
+	up(&bp->stats_lock);
 
 	bnx2x_save_statistics(bp);
 
@@ -13690,6 +13693,7 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
 static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev,
 						pci_channel_state_t state)
 {
+	enum pci_ers_result rc = PCI_ERS_RESULT_NEED_RESET;
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct bnx2x *bp = netdev_priv(dev);
 
@@ -13700,21 +13704,24 @@ static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev,
 	netif_device_detach(dev);
 
 	if (state == pci_channel_io_perm_failure) {
-		rtnl_unlock();
-		return PCI_ERS_RESULT_DISCONNECT;
+		rc = PCI_ERS_RESULT_DISCONNECT;
+		goto out;
 	}
 
-	if (netif_running(dev))
-		bnx2x_eeh_nic_unload(bp);
+	if (netif_running(dev)) {
+		if (bnx2x_eeh_nic_unload(bp)) {
+			rc = PCI_ERS_RESULT_DISCONNECT;
+			goto out;
+		}
+	}
 
 	bnx2x_prev_path_mark_eeh(bp);
 
 	pci_disable_device(pdev);
 
+out:
 	rtnl_unlock();
-
-	/* Request a slot reset */
-	return PCI_ERS_RESULT_NEED_RESET;
+	return rc;
 }
 
 /**
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index e5aca2d..cfe3c769 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2238,7 +2238,9 @@ int bnx2x_vf_close(struct bnx2x *bp, struct bnx2x_virtf *vf)
 
 		cookie.vf = vf;
 		cookie.state = VF_ACQUIRED;
-		bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie);
+		rc = bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie);
+		if (rc)
+			goto op_err;
 	}
 
 	DP(BNX2X_MSG_IOV, "set state to acquired\n");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index d160829..5fca527 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
@@ -123,36 +123,28 @@ static void bnx2x_dp_stats(struct bnx2x *bp)
  */
 static void bnx2x_storm_stats_post(struct bnx2x *bp)
 {
-	if (!bp->stats_pending) {
-		int rc;
+	int rc;
 
-		spin_lock_bh(&bp->stats_lock);
-
-		if (bp->stats_pending) {
-			spin_unlock_bh(&bp->stats_lock);
-			return;
-		}
-
-		bp->fw_stats_req->hdr.drv_stats_counter =
-			cpu_to_le16(bp->stats_counter++);
+	if (bp->stats_pending)
+		return;
 
-		DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n",
-		   le16_to_cpu(bp->fw_stats_req->hdr.drv_stats_counter));
+	bp->fw_stats_req->hdr.drv_stats_counter =
+		cpu_to_le16(bp->stats_counter++);
 
-		/* adjust the ramrod to include VF queues statistics */
-		bnx2x_iov_adjust_stats_req(bp);
-		bnx2x_dp_stats(bp);
+	DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n",
+	   le16_to_cpu(bp->fw_stats_req->hdr.drv_stats_counter));
 
-		/* send FW stats ramrod */
-		rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
-				   U64_HI(bp->fw_stats_req_mapping),
-				   U64_LO(bp->fw_stats_req_mapping),
-				   NONE_CONNECTION_TYPE);
-		if (rc == 0)
-			bp->stats_pending = 1;
+	/* adjust the ramrod to include VF queues statistics */
+	bnx2x_iov_adjust_stats_req(bp);
+	bnx2x_dp_stats(bp);
 
-		spin_unlock_bh(&bp->stats_lock);
-	}
+	/* send FW stats ramrod */
+	rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
+			   U64_HI(bp->fw_stats_req_mapping),
+			   U64_LO(bp->fw_stats_req_mapping),
+			   NONE_CONNECTION_TYPE);
+	if (rc == 0)
+		bp->stats_pending = 1;
 }
 
 static void bnx2x_hw_stats_post(struct bnx2x *bp)
@@ -221,7 +213,7 @@ static void bnx2x_stats_comp(struct bnx2x *bp)
  */
 
 /* should be called under stats_sema */
-static void __bnx2x_stats_pmf_update(struct bnx2x *bp)
+static void bnx2x_stats_pmf_update(struct bnx2x *bp)
 {
 	struct dmae_command *dmae;
 	u32 opcode;
@@ -519,7 +511,7 @@ static void bnx2x_func_stats_init(struct bnx2x *bp)
 }
 
 /* should be called under stats_sema */
-static void __bnx2x_stats_start(struct bnx2x *bp)
+static void bnx2x_stats_start(struct bnx2x *bp)
 {
 	if (IS_PF(bp)) {
 		if (bp->port.pmf)
@@ -531,34 +523,13 @@ static void __bnx2x_stats_start(struct bnx2x *bp)
 		bnx2x_hw_stats_post(bp);
 		bnx2x_storm_stats_post(bp);
 	}
-
-	bp->stats_started = true;
-}
-
-static void bnx2x_stats_start(struct bnx2x *bp)
-{
-	if (down_timeout(&bp->stats_sema, HZ/10))
-		BNX2X_ERR("Unable to acquire stats lock\n");
-	__bnx2x_stats_start(bp);
-	up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_pmf_start(struct bnx2x *bp)
 {
-	if (down_timeout(&bp->stats_sema, HZ/10))
-		BNX2X_ERR("Unable to acquire stats lock\n");
 	bnx2x_stats_comp(bp);
-	__bnx2x_stats_pmf_update(bp);
-	__bnx2x_stats_start(bp);
-	up(&bp->stats_sema);
-}
-
-static void bnx2x_stats_pmf_update(struct bnx2x *bp)
-{
-	if (down_timeout(&bp->stats_sema, HZ/10))
-		BNX2X_ERR("Unable to acquire stats lock\n");
-	__bnx2x_stats_pmf_update(bp);
-	up(&bp->stats_sema);
+	bnx2x_stats_pmf_update(bp);
+	bnx2x_stats_start(bp);
 }
 
 static void bnx2x_stats_restart(struct bnx2x *bp)
@@ -568,11 +539,9 @@ static void bnx2x_stats_restart(struct bnx2x *bp)
 	 */
 	if (IS_VF(bp))
 		return;
-	if (down_timeout(&bp->stats_sema, HZ/10))
-		BNX2X_ERR("Unable to acquire stats lock\n");
+
 	bnx2x_stats_comp(bp);
-	__bnx2x_stats_start(bp);
-	up(&bp->stats_sema);
+	bnx2x_stats_start(bp);
 }
 
 static void bnx2x_bmac_stats_update(struct bnx2x *bp)
@@ -1246,18 +1215,12 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 {
 	u32 *stats_comp = bnx2x_sp(bp, stats_comp);
 
-	/* we run update from timer context, so give up
-	 * if somebody is in the middle of transition
-	 */
-	if (down_trylock(&bp->stats_sema))
+	if (bnx2x_edebug_stats_stopped(bp))
 		return;
 
-	if (bnx2x_edebug_stats_stopped(bp) || !bp->stats_started)
-		goto out;
-
 	if (IS_PF(bp)) {
 		if (*stats_comp != DMAE_COMP_VAL)
-			goto out;
+			return;
 
 		if (bp->port.pmf)
 			bnx2x_hw_stats_update(bp);
@@ -1267,7 +1230,7 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 				BNX2X_ERR("storm stats were not updated for 3 times\n");
 				bnx2x_panic();
 			}
-			goto out;
+			return;
 		}
 	} else {
 		/* vf doesn't collect HW statistics, and doesn't get completions
@@ -1281,7 +1244,7 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 
 	/* vf is done */
 	if (IS_VF(bp))
-		goto out;
+		return;
 
 	if (netif_msg_timer(bp)) {
 		struct bnx2x_eth_stats *estats = &bp->eth_stats;
@@ -1292,9 +1255,6 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 
 	bnx2x_hw_stats_post(bp);
 	bnx2x_storm_stats_post(bp);
-
-out:
-	up(&bp->stats_sema);
 }
 
 static void bnx2x_port_stats_stop(struct bnx2x *bp)
@@ -1358,12 +1318,7 @@ static void bnx2x_port_stats_stop(struct bnx2x *bp)
 
 static void bnx2x_stats_stop(struct bnx2x *bp)
 {
-	int update = 0;
-
-	if (down_timeout(&bp->stats_sema, HZ/10))
-		BNX2X_ERR("Unable to acquire stats lock\n");
-
-	bp->stats_started = false;
+	bool update = false;
 
 	bnx2x_stats_comp(bp);
 
@@ -1381,8 +1336,6 @@ static void bnx2x_stats_stop(struct bnx2x *bp)
 		bnx2x_hw_stats_post(bp);
 		bnx2x_stats_comp(bp);
 	}
-
-	up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_do_nothing(struct bnx2x *bp)
@@ -1410,18 +1363,30 @@ static const struct {
 
 void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
 {
-	enum bnx2x_stats_state state;
-	void (*action)(struct bnx2x *bp);
+	enum bnx2x_stats_state state = bp->stats_state;
+
 	if (unlikely(bp->panic))
 		return;
 
-	spin_lock_bh(&bp->stats_lock);
-	state = bp->stats_state;
+	/* Statistics update run from timer context, and we don't want to stop
+	 * that context in case someone is in the middle of a transition.
+	 * For other events, wait a bit until lock is taken.
+	 */
+	if (down_trylock(&bp->stats_lock)) {
+		if (event == STATS_EVENT_UPDATE)
+			return;
+
+		if (down_timeout(&bp->stats_lock, HZ / 10)) {
+			BNX2X_ERR("Unable to acquire stats lock [Current state %d, event %d]\n",
+				  state, event);
+			return;
+		}
+	}
+
+	bnx2x_stats_stm[state][event].action(bp);
 	bp->stats_state = bnx2x_stats_stm[state][event].next_state;
-	action = bnx2x_stats_stm[state][event].action;
-	spin_unlock_bh(&bp->stats_lock);
 
-	action(bp);
+	up(&bp->stats_lock);
 
 	if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
 		DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
@@ -1998,13 +1963,36 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats,
 	}
 }
 
-void bnx2x_stats_safe_exec(struct bnx2x *bp,
-			   void (func_to_exec)(void *cookie),
-			   void *cookie){
-	if (down_timeout(&bp->stats_sema, HZ/10))
+int bnx2x_stats_safe_exec(struct bnx2x *bp,
+			  void (func_to_exec)(void *cookie),
+			  void *cookie)
+{
+	int cnt = 10;
+
+	if (down_timeout(&bp->stats_lock, HZ / 10)) {
 		BNX2X_ERR("Unable to acquire stats lock\n");
+		return -EBUSY;
+	}
+
+	/* Wait for statistics to end, then run supplied function 'safely' */
 	bnx2x_stats_comp(bp);
+	while (bp->stats_pending && cnt) {
+		if (bnx2x_storm_stats_update(bp)) {
+			usleep_range(1000, 2000);
+			cnt--;
+		}
+	}
+	if (bp->stats_pending) {
+		BNX2X_ERR("Failed to wait for stats pending to clear\n");
+		return -EBUSY;
+	}
+
 	func_to_exec(cookie);
-	__bnx2x_stats_start(bp);
-	up(&bp->stats_sema);
+
+	/* No need to restart statistics - if they're enabled, the timers
+	 * will restart the statistics.
+	 */
+	up(&bp->stats_lock);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
index 2beceae..965539a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
@@ -539,9 +539,9 @@ struct bnx2x;
 void bnx2x_memset_stats(struct bnx2x *bp);
 void bnx2x_stats_init(struct bnx2x *bp);
 void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event);
-void bnx2x_stats_safe_exec(struct bnx2x *bp,
-			   void (func_to_exec)(void *cookie),
-			   void *cookie);
+int bnx2x_stats_safe_exec(struct bnx2x *bp,
+			  void (func_to_exec)(void *cookie),
+			  void *cookie);
 
 /**
  * bnx2x_save_statistics - save statistics when unloading.
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 net] bnx2x: Fix statistics locking scheme
  2015-03-18 14:14 [PATCH v2 net] bnx2x: Fix statistics locking scheme Yuval Mintz
@ 2015-03-18 19:10 ` David Miller
  2015-03-19  7:26   ` Yuval Mintz
  0 siblings, 1 reply; 4+ messages in thread
From: David Miller @ 2015-03-18 19:10 UTC (permalink / raw)
  To: Yuval.Mintz; +Cc: netdev, Ariel.Elior

From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Wed, 18 Mar 2015 16:14:02 +0200

> +int bnx2x_stats_safe_exec(struct bnx2x *bp,
> +			  void (func_to_exec)(void *cookie),
> +			  void *cookie)
 ...
> +	if (bp->stats_pending) {
> +		BNX2X_ERR("Failed to wait for stats pending to clear\n");
> +		return -EBUSY;

Buggy, this returns with the stats_lock still held.

In my humble opinion, you are putting very little care and effort into
this bug fix.

Also I disagree with your timeout logic.

I suspect that the real reason these timeouts are present, is that
the locking hierarchy is uncertain.

Nothing really should create the situation those trylocks seem to
be dealing with.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v2 net] bnx2x: Fix statistics locking scheme
  2015-03-18 19:10 ` David Miller
@ 2015-03-19  7:26   ` Yuval Mintz
  2015-03-19 16:23     ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Yuval Mintz @ 2015-03-19  7:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior

> > +int bnx2x_stats_safe_exec(struct bnx2x *bp,
> > +			  void (func_to_exec)(void *cookie),
> > +			  void *cookie)
>  ...
> > +	if (bp->stats_pending) {
> > +		BNX2X_ERR("Failed to wait for stats pending to clear\n");
> > +		return -EBUSY;
> 
> Buggy, this returns with the stats_lock still held.

Ouch; You're right - I'll fix it.

> 
> In my humble opinion, you are putting very little care and effort into this bug fix.

Well, we've put quite a bit in the logic behind it; But you're probably right
About the effort put into v2.

> Also I disagree with your timeout logic.
> 
> I suspect that the real reason these timeouts are present, is that the locking
> hierarchy is uncertain.
> 
> Nothing really should create the situation those trylocks seem to be dealing
> with.

Actually, you're probably right in the assumption - I have a feeling that due to
the races in the previous implementation, there was lack of confidence in the
general process.
According to my current analysis there shouldn't be any real need for the limit
on time, since there are no limitless loops taking place while the locks are being
held [at most, HW/FW is asserted and nothing happens; But it still should take
only a couple of mili-seconds].
The only reason I've left this was to leave the implementation close as I can to
original [which used that locking mechanism] given that it's intended to `net'.
Do you prefer I simply switch over into using a regular mutex?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 net] bnx2x: Fix statistics locking scheme
  2015-03-19  7:26   ` Yuval Mintz
@ 2015-03-19 16:23     ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2015-03-19 16:23 UTC (permalink / raw)
  To: Yuval.Mintz; +Cc: netdev, Ariel.Elior

From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Thu, 19 Mar 2015 07:26:25 +0000

> Do you prefer I simply switch over into using a regular mutex?

Yes, I do, unless you can prove that it is insufficient.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-03-19 16:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-18 14:14 [PATCH v2 net] bnx2x: Fix statistics locking scheme Yuval Mintz
2015-03-18 19:10 ` David Miller
2015-03-19  7:26   ` Yuval Mintz
2015-03-19 16:23     ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.