All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] bnx2x: Fix netpoll interoperability
@ 2015-04-14 13:35 Yuval Mintz
  2015-04-14 14:52 ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Yuval Mintz @ 2015-04-14 13:35 UTC (permalink / raw)
  To: davem, netdev; +Cc: bind, peter, Yuval Mintz, Ariel Elior

Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload") switched
the napi/busy_lock locking mechanism from spin_lock() into spin_lock_bh(),
breaking inter-operability with netconsole, as netpoll disables interrupts
prior to calling our napi mechanism.

This switches the driver into using atomic assignments instead of the spinlock
mechanisms previously employed.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
---
Hi Dave,

Please consider applying this to 'net'.

Thanks,
Yuval
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     | 116 +++++++-----------------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |   5 +-
 2 files changed, 34 insertions(+), 87 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 4085c4b..d8b87a7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -525,26 +525,23 @@ enum bnx2x_tpa_mode_t {
 	TPA_MODE_GRO
 };
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+enum bnx2x_fp_state {
+	BNX2X_STATE_FP_IDLE,
+	BNX2X_STATE_FP_NAPI,
+	BNX2X_STATE_FP_POLL,
+	BNX2X_STATE_FP_DISABLE,
+};
+#endif
+
 struct bnx2x_fastpath {
 	struct bnx2x		*bp; /* parent */
 
 	struct napi_struct	napi;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
-	unsigned int state;
-#define BNX2X_FP_STATE_IDLE		      0
-#define BNX2X_FP_STATE_NAPI		(1 << 0)    /* NAPI owns this FP */
-#define BNX2X_FP_STATE_POLL		(1 << 1)    /* poll owns this FP */
-#define BNX2X_FP_STATE_DISABLED		(1 << 2)
-#define BNX2X_FP_STATE_NAPI_YIELD	(1 << 3)    /* NAPI yielded this FP */
-#define BNX2X_FP_STATE_POLL_YIELD	(1 << 4)    /* poll yielded this FP */
-#define BNX2X_FP_OWNED	(BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL)
-#define BNX2X_FP_YIELD	(BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD)
-#define BNX2X_FP_LOCKED	(BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED)
-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
-	/* protect state */
-	spinlock_t lock;
-#endif /* CONFIG_NET_RX_BUSY_POLL */
+	atomic_t state;
+#endif
 
 	union host_hc_status_block	status_blk;
 	/* chip independent shortcuts into sb structure */
@@ -621,99 +618,50 @@ struct bnx2x_fastpath {
 #ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
 {
-	spin_lock_init(&fp->lock);
-	fp->state = BNX2X_FP_STATE_IDLE;
+	atomic_set(&fp->state, BNX2X_STATE_FP_IDLE);
 }
 
 /* called from the device poll routine to get ownership of a FP */
 static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
 {
-	bool rc = true;
-
-	spin_lock_bh(&fp->lock);
-	if (fp->state & BNX2X_FP_LOCKED) {
-		WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-		fp->state |= BNX2X_FP_STATE_NAPI_YIELD;
-		rc = false;
-	} else {
-		/* we don't care if someone yielded */
-		fp->state = BNX2X_FP_STATE_NAPI;
-	}
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	int rc = atomic_cmpxchg(&fp->state, BNX2X_STATE_FP_IDLE,
+				BNX2X_STATE_FP_NAPI);
+	return rc == BNX2X_STATE_FP_IDLE;
 }
 
-/* returns true is someone tried to get the FP while napi had it */
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
 {
-	bool rc = false;
-
-	spin_lock_bh(&fp->lock);
-	WARN_ON(fp->state &
-		(BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD));
-
-	if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
-		rc = true;
-
-	/* state ==> idle, unless currently disabled */
-	fp->state &= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	WARN_ON(atomic_read(&fp->state) != BNX2X_STATE_FP_NAPI);
+	atomic_set(&fp->state, BNX2X_STATE_FP_IDLE);
 }
 
 /* called from bnx2x_low_latency_poll() */
 static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
 {
-	bool rc = true;
-
-	spin_lock_bh(&fp->lock);
-	if ((fp->state & BNX2X_FP_LOCKED)) {
-		fp->state |= BNX2X_FP_STATE_POLL_YIELD;
-		rc = false;
-	} else {
-		/* preserve yield marks */
-		fp->state |= BNX2X_FP_STATE_POLL;
-	}
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	int rc = atomic_cmpxchg(&fp->state, BNX2X_STATE_FP_IDLE,
+				BNX2X_STATE_FP_POLL);
+	return rc == BNX2X_STATE_FP_IDLE;
 }
 
-/* returns true if someone tried to get the FP while it was locked */
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
 {
-	bool rc = false;
-
-	spin_lock_bh(&fp->lock);
-	WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-
-	if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
-		rc = true;
-
-	/* state ==> idle, unless currently disabled */
-	fp->state &= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	WARN_ON(atomic_read(&fp->state) != BNX2X_STATE_FP_POLL);
+	atomic_set(&fp->state, BNX2X_STATE_FP_IDLE);
 }
 
-/* true if a socket is polling, even if it did not get the lock */
+/* true if a socket is polling */
 static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
 {
-	WARN_ON(!(fp->state & BNX2X_FP_OWNED));
-	return fp->state & BNX2X_FP_USER_PEND;
+	return atomic_read(&fp->state) == BNX2X_STATE_FP_POLL;
 }
 
 /* false if fp is currently owned */
 static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
 {
-	int rc = true;
+	int rc = atomic_cmpxchg(&fp->state, BNX2X_STATE_FP_IDLE,
+				BNX2X_STATE_FP_DISABLE);
+	return rc == BNX2X_STATE_FP_IDLE;
 
-	spin_lock_bh(&fp->lock);
-	if (fp->state & BNX2X_FP_OWNED)
-		rc = false;
-	fp->state |= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-
-	return rc;
 }
 #else
 static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
@@ -725,9 +673,8 @@ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
 	return true;
 }
 
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
 {
-	return false;
 }
 
 static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
@@ -735,9 +682,8 @@ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
 	return false;
 }
 
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
 {
-	return false;
 }
 
 static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0a9faa1..f7c0375 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 			}
 		}
 
+		bnx2x_fp_unlock_napi(fp);
+
 		/* Fall out from the NAPI loop if needed */
-		if (!bnx2x_fp_unlock_napi(fp) &&
-		    !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+		if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
 
 			/* No need to update SB for FCoE L2 ring as long as
 			 * it's connected to the default SB and the SB
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net] bnx2x: Fix netpoll interoperability
  2015-04-14 13:35 [PATCH net] bnx2x: Fix netpoll interoperability Yuval Mintz
@ 2015-04-14 14:52 ` Eric Dumazet
  2015-04-14 16:44   ` Yuval Mintz
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2015-04-14 14:52 UTC (permalink / raw)
  To: Yuval Mintz; +Cc: davem, netdev, bind, peter, Ariel Elior, Willem de Bruijn

On Tue, 2015-04-14 at 16:35 +0300, Yuval Mintz wrote:
> Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload") switched
> the napi/busy_lock locking mechanism from spin_lock() into spin_lock_bh(),
> breaking inter-operability with netconsole, as netpoll disables interrupts
> prior to calling our napi mechanism.
> 
> This switches the driver into using atomic assignments instead of the spinlock
> mechanisms previously employed.
> 
> Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
> Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
> ---

This fix is prone to starvation of softirq handler.

Since you use 2 bits, I believe you could set one bit unconditionally to
make sure the bnx2x_low_latency_recv() will yield to softirq handler.

Please CC me and Willem on busypoll stuff, thanks !

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH net] bnx2x: Fix netpoll interoperability
  2015-04-14 14:52 ` Eric Dumazet
@ 2015-04-14 16:44   ` Yuval Mintz
  2015-04-14 17:51     ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Yuval Mintz @ 2015-04-14 16:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

>> Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload") switched
>> the napi/busy_lock locking mechanism from spin_lock() into spin_lock_bh(),
>> breaking inter-operability with netconsole, as netpoll disables interrupts
>> prior to calling our napi mechanism.
>>
>> This switches the driver into using atomic assignments instead of the spinlock
>> mechanisms previously employed.

> This fix is prone to starvation of softirq handler.
> Since you use 2 bits, I believe you could set one bit unconditionally to
> make sure the bnx2x_low_latency_recv() will yield to softirq handler.

Care to elaborate a bit? About the possibly starvation?

> Please CC me and Willem on busypoll stuff, thanks !

Sure.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net] bnx2x: Fix netpoll interoperability
  2015-04-14 16:44   ` Yuval Mintz
@ 2015-04-14 17:51     ` Eric Dumazet
  2015-04-14 18:11       ` Yuval Mintz
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2015-04-14 17:51 UTC (permalink / raw)
  To: Yuval Mintz
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

On Tue, 2015-04-14 at 16:44 +0000, Yuval Mintz wrote:
> >> Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload") switched
> >> the napi/busy_lock locking mechanism from spin_lock() into spin_lock_bh(),
> >> breaking inter-operability with netconsole, as netpoll disables interrupts
> >> prior to calling our napi mechanism.
> >>
> >> This switches the driver into using atomic assignments instead of the spinlock
> >> mechanisms previously employed.
> 
> > This fix is prone to starvation of softirq handler.
> > Since you use 2 bits, I believe you could set one bit unconditionally to
> > make sure the bnx2x_low_latency_recv() will yield to softirq handler.
> 
> Care to elaborate a bit? About the possibly starvation?
> 
> > Please CC me and Willem on busypoll stuff, thanks !

I believe the original busypoll yield concern was forgotten or reversed.

If you have many busypoll users, they might get the 'lock', and the poor
cpu that received the hardware interrupt and raised RX softirq for this
RX queue will spin forever [1]

busypoll users should backoff and not attempt to grab a lock (especially
if this lock is not a ticket lock anymore) if the softirq handler is
desperately trying to make progress.

[1]
sofirq handler

   while (1) {
       if (!bnx2x_fp_lock_napi())
		continue;
        }
   }

I can provide an alternative patch if you like.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH net] bnx2x: Fix netpoll interoperability
  2015-04-14 17:51     ` Eric Dumazet
@ 2015-04-14 18:11       ` Yuval Mintz
  2015-04-14 18:24         ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Yuval Mintz @ 2015-04-14 18:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

> I believe the original busypoll yield concern was forgotten or reversed.

> If you have many busypoll users, they might get the 'lock', and the poor
> cpu that received the hardware interrupt and raised RX softirq for this
> RX queue will spin forever [1]

> busypoll users should backoff and not attempt to grab a lock (especially
> if this lock is not a ticket lock anymore) if the softirq handler is
> desperately trying to make progress.

> [1]
> sofirq handler

>   while (1) {
>       if (!bnx2x_fp_lock_napi())
>                continue;

bnx2x returns the budget, it doesn't loop in the poll function.
[I don't have the napi_poll logic in mind, but I do hope it doesn't iterate
repeatedly but rather only on next re-schedule]

>        }
>   }

> I can provide an alternative patch if you like.

If you're willing to make the effort, sure.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net] bnx2x: Fix netpoll interoperability
  2015-04-14 18:11       ` Yuval Mintz
@ 2015-04-14 18:24         ` Eric Dumazet
  2015-04-15  1:45           ` [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2015-04-14 18:24 UTC (permalink / raw)
  To: Yuval Mintz
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

On Tue, 2015-04-14 at 18:11 +0000, Yuval Mintz wrote:
> > I believe the original busypoll yield concern was forgotten or reversed.
> 
> > If you have many busypoll users, they might get the 'lock', and the poor
> > cpu that received the hardware interrupt and raised RX softirq for this
> > RX queue will spin forever [1]
> 
> > busypoll users should backoff and not attempt to grab a lock (especially
> > if this lock is not a ticket lock anymore) if the softirq handler is
> > desperately trying to make progress.
> 
> > [1]
> > sofirq handler
> 
> >   while (1) {
> >       if (!bnx2x_fp_lock_napi())
> >                continue;
> 
> bnx2x returns the budget, it doesn't loop in the poll function.

returning budget literally asks to call again asap.

> [I don't have the napi_poll logic in mind, but I do hope it doesn't iterate
> repeatedly but rather only on next re-schedule]


I can tell you it loops. Look again at the code.

To break the loop, you have to call napi_complete()



> >        }
> >   }
> 
> > I can provide an alternative patch if you like.
> 
> If you're willing to make the effort, sure.

Yes, will do that shortly.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll
  2015-04-14 18:24         ` Eric Dumazet
@ 2015-04-15  1:45           ` Eric Dumazet
  2015-04-15  4:05             ` Yuval Mintz
  2015-04-15 21:25             ` David Miller
  0 siblings, 2 replies; 12+ messages in thread
From: Eric Dumazet @ 2015-04-15  1:45 UTC (permalink / raw)
  To: Yuval Mintz
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

From: Eric Dumazet <edumazet@google.com>

Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
switched the napi/busy_lock locking mechanism from spin_lock() into
spin_lock_bh(), breaking inter-operability with netconsole, as netpoll
disables interrupts prior to calling our napi mechanism.

This switches the driver into using atomic assignments instead of the
spinlock mechanisms previously employed.

Based on initial patch from Yuval Mintz & Ariel Elior

I basically added softirq starvation avoidance, and mixture
of atomic operations, plain writes and barriers.

Note this slightly reduces the overhead for this driver when no
busy_poll sockets are in use.

Fixes: 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---

Please Yuval Mintz & Ariel Elior carefully review and test this patch.

David: This applies on net-next or net, and needs a stable backport once
validated.

 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     |  137 +++++---------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |    9 
 2 files changed, 56 insertions(+), 90 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 4085c4b31047..355d5fea5be9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -531,20 +531,8 @@ struct bnx2x_fastpath {
 	struct napi_struct	napi;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
-	unsigned int state;
-#define BNX2X_FP_STATE_IDLE		      0
-#define BNX2X_FP_STATE_NAPI		(1 << 0)    /* NAPI owns this FP */
-#define BNX2X_FP_STATE_POLL		(1 << 1)    /* poll owns this FP */
-#define BNX2X_FP_STATE_DISABLED		(1 << 2)
-#define BNX2X_FP_STATE_NAPI_YIELD	(1 << 3)    /* NAPI yielded this FP */
-#define BNX2X_FP_STATE_POLL_YIELD	(1 << 4)    /* poll yielded this FP */
-#define BNX2X_FP_OWNED	(BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL)
-#define BNX2X_FP_YIELD	(BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD)
-#define BNX2X_FP_LOCKED	(BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED)
-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
-	/* protect state */
-	spinlock_t lock;
-#endif /* CONFIG_NET_RX_BUSY_POLL */
+	unsigned long		busy_poll_state;
+#endif
 
 	union host_hc_status_block	status_blk;
 	/* chip independent shortcuts into sb structure */
@@ -619,104 +607,83 @@ struct bnx2x_fastpath {
 #define bnx2x_fp_qstats(bp, fp)	(&((bp)->fp_stats[(fp)->index].eth_q_stats))
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+
+enum bnx2x_fp_state {
+	BNX2X_STATE_FP_NAPI	= BIT(0), /* NAPI handler owns the queue */
+
+	BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
+	BNX2X_STATE_FP_NAPI_REQ = BIT(1),
+
+	BNX2X_STATE_FP_POLL_BIT = 2,
+	BNX2X_STATE_FP_POLL     = BIT(2), /* busy_poll owns the queue */
+
+	BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
+};
+
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
 {
-	spin_lock_init(&fp->lock);
-	fp->state = BNX2X_FP_STATE_IDLE;
+	WRITE_ONCE(fp->busy_poll_state, 0);
 }
 
 /* called from the device poll routine to get ownership of a FP */
 static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
 {
-	bool rc = true;
-
-	spin_lock_bh(&fp->lock);
-	if (fp->state & BNX2X_FP_LOCKED) {
-		WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-		fp->state |= BNX2X_FP_STATE_NAPI_YIELD;
-		rc = false;
-	} else {
-		/* we don't care if someone yielded */
-		fp->state = BNX2X_FP_STATE_NAPI;
+	unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
+
+	while (1) {
+		switch (old) {
+		case BNX2X_STATE_FP_POLL:
+			/* make sure bnx2x_fp_lock_poll() wont starve us */
+			set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
+				&fp->busy_poll_state);
+			/* fallthrough */
+		case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
+			return false;
+		default:
+			break;
+		}
+		prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
+		if (unlikely(prev != old)) {
+			old = prev;
+			continue;
+		}
+		return true;
 	}
-	spin_unlock_bh(&fp->lock);
-	return rc;
 }
 
-/* returns true is someone tried to get the FP while napi had it */
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
 {
-	bool rc = false;
-
-	spin_lock_bh(&fp->lock);
-	WARN_ON(fp->state &
-		(BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD));
-
-	if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
-		rc = true;
-
-	/* state ==> idle, unless currently disabled */
-	fp->state &= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	smp_wmb();
+	fp->busy_poll_state = 0;
 }
 
 /* called from bnx2x_low_latency_poll() */
 static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
 {
-	bool rc = true;
-
-	spin_lock_bh(&fp->lock);
-	if ((fp->state & BNX2X_FP_LOCKED)) {
-		fp->state |= BNX2X_FP_STATE_POLL_YIELD;
-		rc = false;
-	} else {
-		/* preserve yield marks */
-		fp->state |= BNX2X_FP_STATE_POLL;
-	}
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
 }
 
-/* returns true if someone tried to get the FP while it was locked */
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
 {
-	bool rc = false;
-
-	spin_lock_bh(&fp->lock);
-	WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-
-	if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
-		rc = true;
-
-	/* state ==> idle, unless currently disabled */
-	fp->state &= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	smp_mb__before_atomic();
+	clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
 }
 
-/* true if a socket is polling, even if it did not get the lock */
+/* true if a socket is polling */
 static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
 {
-	WARN_ON(!(fp->state & BNX2X_FP_OWNED));
-	return fp->state & BNX2X_FP_USER_PEND;
+	return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
 }
 
 /* false if fp is currently owned */
 static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
 {
-	int rc = true;
-
-	spin_lock_bh(&fp->lock);
-	if (fp->state & BNX2X_FP_OWNED)
-		rc = false;
-	fp->state |= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
+	set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
+	return !bnx2x_fp_ll_polling(fp);
 
-	return rc;
 }
 #else
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
 {
 }
 
@@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
 	return true;
 }
 
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
 {
-	return false;
 }
 
 static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
@@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
 	return false;
 }
 
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
 {
-	return false;
 }
 
 static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0a9faa134a9a..2f63467bce46 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp)
 	int i;
 
 	for_each_rx_queue_cnic(bp, i) {
-		bnx2x_fp_init_lock(&bp->fp[i]);
+		bnx2x_fp_busy_poll_init(&bp->fp[i]);
 		napi_enable(&bnx2x_fp(bp, i, napi));
 	}
 }
@@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp)
 	int i;
 
 	for_each_eth_queue(bp, i) {
-		bnx2x_fp_init_lock(&bp->fp[i]);
+		bnx2x_fp_busy_poll_init(&bp->fp[i]);
 		napi_enable(&bnx2x_fp(bp, i, napi));
 	}
 }
@@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 			}
 		}
 
+		bnx2x_fp_unlock_napi(fp);
+
 		/* Fall out from the NAPI loop if needed */
-		if (!bnx2x_fp_unlock_napi(fp) &&
-		    !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+		if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
 
 			/* No need to update SB for FCoE L2 ring as long as
 			 * it's connected to the default SB and the SB

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll
  2015-04-15  1:45           ` [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll Eric Dumazet
@ 2015-04-15  4:05             ` Yuval Mintz
  2015-04-15 10:05               ` Eric Dumazet
  2015-04-15 21:25             ` David Miller
  1 sibling, 1 reply; 12+ messages in thread
From: Yuval Mintz @ 2015-04-15  4:05 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

>  #ifdef CONFIG_NET_RX_BUSY_POLL
> -static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
> +
> +enum bnx2x_fp_state {
> +       BNX2X_STATE_FP_NAPI     = BIT(0), /* NAPI handler owns the queue */
> +
> +       BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
> +       BNX2X_STATE_FP_NAPI_REQ = BIT(1),
> +
> +       BNX2X_STATE_FP_POLL_BIT = 2,
> +       BNX2X_STATE_FP_POLL     = BIT(2), /* busy_poll owns the queue */
> +
> +       BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
> +};

...

> /* called from the device poll routine to get ownership of a FP */
> static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
>  {
> +       unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
> +
> +       while (1) {
> +               switch (old) {
> +               case BNX2X_STATE_FP_POLL:
> +                       /* make sure bnx2x_fp_lock_poll() wont starve us */
> +                       set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
> +                               &fp->busy_poll_state);
> +                       /* fallthrough */
> +               case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
> +                       return false;
> +               default:
> +                       break;
> +               }
> +               prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);

Wouldn't this override the disabled status? Shouldn't we return 'false'
if BNX2X_STATE_FP_DISABLE_BIT is set?

> +               if (unlikely(prev != old)) {
> +                       old = prev;
> +                       continue;
> +               }
> +               return true;
>       }
>  }

BTW, this looks quite generic - isn't it possible to take it out of the
driver and push it into the networking core, uniforming it in the process?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll
  2015-04-15  4:05             ` Yuval Mintz
@ 2015-04-15 10:05               ` Eric Dumazet
  2015-04-15 14:03                 ` Yuval Mintz
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2015-04-15 10:05 UTC (permalink / raw)
  To: Yuval Mintz
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

On Wed, 2015-04-15 at 04:05 +0000, Yuval Mintz wrote:
> >  #ifdef CONFIG_NET_RX_BUSY_POLL
> > -static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
> > +
> > +enum bnx2x_fp_state {
> > +       BNX2X_STATE_FP_NAPI     = BIT(0), /* NAPI handler owns the queue */
> > +
> > +       BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
> > +       BNX2X_STATE_FP_NAPI_REQ = BIT(1),
> > +
> > +       BNX2X_STATE_FP_POLL_BIT = 2,
> > +       BNX2X_STATE_FP_POLL     = BIT(2), /* busy_poll owns the queue */
> > +
> > +       BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
> > +};
> 
> ...
> 
> > /* called from the device poll routine to get ownership of a FP */
> > static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
> >  {
> > +       unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
> > +
> > +       while (1) {
> > +               switch (old) {
> > +               case BNX2X_STATE_FP_POLL:
> > +                       /* make sure bnx2x_fp_lock_poll() wont starve us */
> > +                       set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
> > +                               &fp->busy_poll_state);
> > +                       /* fallthrough */
> > +               case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
> > +                       return false;
> > +               default:
> > +                       break;
> > +               }
> > +               prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
> 
> Wouldn't this override the disabled status? Shouldn't we return 'false'
> if BNX2X_STATE_FP_DISABLE_BIT is set?

My reasoning is that at this point BNX2X_STATE_FP_DISABLE_BIT cannot be
set.

BNX2X_STATE_FP_DISABLE_BIT is set only after a call to napi_disable()


	napi_disable(&bnx2x_fp(bp, i, napi)); 
	while (!bnx2x_fp_ll_disable(&bp->fp[i]))
		usleep_range(1000, 2000);


It is important to assume this, otherwise bnx2x_fp_unlock_napi() could
not be a simple write, but would need a locked operation.

Also note that napi_disable() itself sets NAPI_STATE_DISABLE
so that napi_poll() can shortcut the napi->poll() at one point
and break a potential infinite loop under rx pressure.

	set_bit(NAPI_STATE_DISABLE, &n->state);
> 
> > +               if (unlikely(prev != old)) {
> > +                       old = prev;
> > +                       continue;
> > +               }
> > +               return true;
> >       }
> >  }
> 
> BTW, this looks quite generic - isn't it possible to take it out of the
> driver and push it into the networking core, uniforming it in the process?

Yes, this is the plan I have in mind, once net-next is opened again ;)

Thanks !

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll
  2015-04-15 10:05               ` Eric Dumazet
@ 2015-04-15 14:03                 ` Yuval Mintz
  2015-04-15 17:09                   ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Yuval Mintz @ 2015-04-15 14:03 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

> >
> > BTW, this looks quite generic - isn't it possible to take it out of
> > the driver and push it into the networking core, uniforming it in the process?
> 
> Yes, this is the plan I have in mind, once net-next is opened again ;)
> 
> Thanks !
> 

I'm not familiar with any real strong test-suite for the busy poll, but I did try
running all kinds of netperf connections with various busy_{poll,read} values
and performance seemed reasonable, and verified that netconsole manages
to work.

So for all it's worth,
Tested-by: Yuval Mintz <Yuval.Mintz@qlogic.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll
  2015-04-15 14:03                 ` Yuval Mintz
@ 2015-04-15 17:09                   ` Eric Dumazet
  0 siblings, 0 replies; 12+ messages in thread
From: Eric Dumazet @ 2015-04-15 17:09 UTC (permalink / raw)
  To: Yuval Mintz
  Cc: David Miller, netdev, bind, peter, Ariel Elior, Willem de Bruijn

On Wed, 2015-04-15 at 14:03 +0000, Yuval Mintz wrote:
> > >
> > > BTW, this looks quite generic - isn't it possible to take it out of
> > > the driver and push it into the networking core, uniforming it in the process?
> > 
> > Yes, this is the plan I have in mind, once net-next is opened again ;)
> > 
> > Thanks !
> > 
> 
> I'm not familiar with any real strong test-suite for the busy poll, but I did try
> running all kinds of netperf connections with various busy_{poll,read} values
> and performance seemed reasonable, and verified that netconsole manages
> to work.
> 
> So for all it's worth,
> Tested-by: Yuval Mintz <Yuval.Mintz@qlogic.com>

Thanks a lot !

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll
  2015-04-15  1:45           ` [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll Eric Dumazet
  2015-04-15  4:05             ` Yuval Mintz
@ 2015-04-15 21:25             ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: David Miller @ 2015-04-15 21:25 UTC (permalink / raw)
  To: eric.dumazet; +Cc: Yuval.Mintz, netdev, bind, peter, Ariel.Elior, willemb

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 14 Apr 2015 18:45:00 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
> switched the napi/busy_lock locking mechanism from spin_lock() into
> spin_lock_bh(), breaking inter-operability with netconsole, as netpoll
> disables interrupts prior to calling our napi mechanism.
> 
> This switches the driver into using atomic assignments instead of the
> spinlock mechanisms previously employed.
> 
> Based on initial patch from Yuval Mintz & Ariel Elior
> 
> I basically added softirq starvation avoidance, and mixture
> of atomic operations, plain writes and barriers.
> 
> Note this slightly reduces the overhead for this driver when no
> busy_poll sockets are in use.
> 
> Fixes: 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-04-15 21:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-14 13:35 [PATCH net] bnx2x: Fix netpoll interoperability Yuval Mintz
2015-04-14 14:52 ` Eric Dumazet
2015-04-14 16:44   ` Yuval Mintz
2015-04-14 17:51     ` Eric Dumazet
2015-04-14 18:11       ` Yuval Mintz
2015-04-14 18:24         ` Eric Dumazet
2015-04-15  1:45           ` [PATCH v2 net] bnx2x: Fix busy_poll vs netpoll Eric Dumazet
2015-04-15  4:05             ` Yuval Mintz
2015-04-15 10:05               ` Eric Dumazet
2015-04-15 14:03                 ` Yuval Mintz
2015-04-15 17:09                   ` Eric Dumazet
2015-04-15 21:25             ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.