All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net: fix race between napi kthread mode and busy poll
@ 2021-02-23 23:41 Wei Wang
  2021-02-24 19:48 ` Jakub Kicinski
  0 siblings, 1 reply; 34+ messages in thread
From: Wei Wang @ 2021-02-23 23:41 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, netdev
  Cc: Eric Dumazet, Paolo Abeni, Hannes Frederic Sowa, Alexander Duyck,
	Martin Zaharinov

Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
determine if the kthread owns this napi and could call napi->poll() on
it. However, if socket busy poll is enabled, it is possible that the
busy poll thread grabs this SCHED bit (after the previous napi->poll()
invokes napi_complete_done() and clears SCHED bit) and tries to poll
on the same napi.
This patch tries to fix this race by adding a new bit
NAPI_STATE_SCHED_BUSY_POLL in napi->state. This bit gets set in
napi_busy_loop() togther with NAPI_STATE_SCHED, and gets cleared in
napi_complete_done() together with NAPI_STATE_SCHED. This helps
distinguish the ownership of the napi between kthread and the busy poll
thread, and prevents the kthread from polling on the napi when this napi
is still owned by the busy poll thread.

Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
Reported-by: Martin Zaharinov <micron10@gmail.com>
Suggested-by: Alexander Duyck <alexanderduyck@fb.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Reviewed-by: Eric Dumazet <edumazet@google.come>
Signed-off-by: Wei Wang <weiwan@google.com>
---
 include/linux/netdevice.h |  4 +++-
 net/core/dev.c            | 10 ++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddf4cfc12615..9ed0f89ccdd5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -357,9 +357,10 @@ enum {
 	NAPI_STATE_NPSVC,		/* Netpoll - don't dequeue from poll_list */
 	NAPI_STATE_LISTED,		/* NAPI added to system lists */
 	NAPI_STATE_NO_BUSY_POLL,	/* Do not add in napi_hash, no busy polling */
-	NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() owns this NAPI */
+	NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() grabs SHED bit and could busy poll */
 	NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/
 	NAPI_STATE_THREADED,		/* The poll is performed inside its own thread*/
+	NAPI_STATE_SCHED_BUSY_POLL,	/* Napi is currently scheduled in busy poll mode */
 };
 
 enum {
@@ -372,6 +373,7 @@ enum {
 	NAPIF_STATE_IN_BUSY_POLL	= BIT(NAPI_STATE_IN_BUSY_POLL),
 	NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL),
 	NAPIF_STATE_THREADED		= BIT(NAPI_STATE_THREADED),
+	NAPIF_STATE_SCHED_BUSY_POLL	= BIT(NAPI_STATE_SCHED_BUSY_POLL),
 };
 
 enum gro_result {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c5967e80132..ec1a30d95d8b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6486,6 +6486,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 		WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
 
 		new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+			      NAPIF_STATE_SCHED_BUSY_POLL |
 			      NAPIF_STATE_PREFER_BUSY_POLL);
 
 		/* If STATE_MISSED was set, leave STATE_SCHED set,
@@ -6525,6 +6526,7 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
 
 static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
 {
+	clear_bit(NAPI_STATE_SCHED_BUSY_POLL, &napi->state);
 	if (!skip_schedule) {
 		gro_normal_list(napi);
 		__napi_schedule(napi);
@@ -6624,7 +6626,8 @@ void napi_busy_loop(unsigned int napi_id,
 			}
 			if (cmpxchg(&napi->state, val,
 				    val | NAPIF_STATE_IN_BUSY_POLL |
-					  NAPIF_STATE_SCHED) != val) {
+					  NAPIF_STATE_SCHED |
+					  NAPIF_STATE_SCHED_BUSY_POLL) != val) {
 				if (prefer_busy_poll)
 					set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
 				goto count;
@@ -6971,7 +6974,10 @@ static int napi_thread_wait(struct napi_struct *napi)
 	set_current_state(TASK_INTERRUPTIBLE);
 
 	while (!kthread_should_stop() && !napi_disable_pending(napi)) {
-		if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+		unsigned long val = READ_ONCE(napi->state);
+
+		if (val & NAPIF_STATE_SCHED &&
+		    !(val & NAPIF_STATE_SCHED_BUSY_POLL)) {
 			WARN_ON(!list_empty(&napi->poll_list));
 			__set_current_state(TASK_RUNNING);
 			return 0;
-- 
2.30.0.617.g56c4b15f3c-goog


^ permalink raw reply related	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2021-02-26 23:11 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-23 23:41 [PATCH net] net: fix race between napi kthread mode and busy poll Wei Wang
2021-02-24 19:48 ` Jakub Kicinski
2021-02-24 20:37   ` Eric Dumazet
2021-02-24 21:30     ` Jakub Kicinski
2021-02-24 22:29       ` Wei Wang
2021-02-24 23:29         ` Jakub Kicinski
     [not found]       ` <CANn89i+xGsMpRfPwZK281jyfum_1fhTNFXq7Z8HOww9H1BHmiw@mail.gmail.com>
2021-02-24 23:52         ` Jakub Kicinski
2021-02-24 23:59           ` Eric Dumazet
2021-02-25  0:07             ` Jakub Kicinski
2021-02-25  0:11               ` Alexander Duyck
2021-02-25  0:16                 ` Wei Wang
2021-02-25  0:32                   ` Jakub Kicinski
2021-02-25  0:44                     ` Wei Wang
2021-02-25  0:49                       ` Jakub Kicinski
2021-02-25  1:06                         ` Wei Wang
2021-02-25  1:40                           ` Jakub Kicinski
2021-02-25  2:16                             ` Wei Wang
2021-02-25  0:20                 ` Jakub Kicinski
2021-02-25  1:22                   ` Alexander Duyck
2021-02-25  2:03                     ` Jakub Kicinski
2021-02-25  2:31                       ` Wei Wang
2021-02-25  5:52                         ` Martin Zaharinov
2021-02-25  8:21                         ` Jakub Kicinski
2021-02-25 18:29                           ` Wei Wang
2021-02-25 23:00                             ` Jakub Kicinski
2021-02-26  0:16                               ` Wei Wang
2021-02-26  1:18                                 ` Jakub Kicinski
2021-02-26  1:49                                   ` Wei Wang
2021-02-26  3:52                                   ` Alexander Duyck
2021-02-26 18:28                                     ` Wei Wang
2021-02-26 21:35                                       ` Jakub Kicinski
2021-02-26 22:24                                         ` Wei Wang
     [not found]                                           ` <CALidq=UWupwXMMYAMMF2GW4ifR0WQJos6VqXPuzQ0_seHGUHdA@mail.gmail.com>
2021-02-26 22:37                                             ` Wei Wang
2021-02-26 23:10                                           ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.