All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Tom Herbert <therbert@google.com>, David Miller <davem@davemloft.net>
Cc: netdev <netdev@vger.kernel.org>
Subject: [RFC] rps: shortcut net_rps_action()
Date: Mon, 19 Apr 2010 11:37:02 +0200	[thread overview]
Message-ID: <1271669822.16881.7520.camel@edumazet-laptop> (raw)
In-Reply-To: <1271590476.16881.4925.camel@edumazet-laptop>

net_rps_action() is a bit expensive on NR_CPUS=64..4096 kernels, even if
RPS is not active.

I add a flag to scan cpumask only if at least one IPI was scheduled.
Even cpumask_weight() might be expensive on some setups, where
nr_cpumask_bits could be very big (4096 for example)

Move all RPS logic into net_rps_action() to cleanup net_rx_action() code
(remove two ifdefs)

Move rps_remote_softirq_cpus into softnet_data to share its first cache
line, filling an existing hole.

In a future patch, we could call net_rps_action() from process_backlog()
to make sure we send IPI before handling this cpu backlog.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/netdevice.h |    5 +-
 net/core/dev.c            |   73 ++++++++++++++++--------------------
 2 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 649a025..283d3ef 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1389,8 +1389,11 @@ struct softnet_data {
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
-	/* Elements below can be accessed between CPUs for RPS */
 #ifdef CONFIG_RPS
+	unsigned int		rps_ipis_scheduled;
+	unsigned int		rps_select;
+	cpumask_t		rps_mask[2];
+	/* Elements below can be accessed between CPUs for RPS */
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
 	unsigned int		input_queue_head;
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 7abf959..3e6e420 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2347,19 +2347,14 @@ done:
 }
 
 /*
- * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * sofnet_data holds the per-CPU mask of CPUs for which IPIs are scheduled
  * to be sent to kick remote softirq processing.  There are two masks since
- * the sending of IPIs must be done with interrupts enabled.  The select field
+ * the sending of IPIs must be done with interrupts enabled.  The rps_select field
  * indicates the current mask that enqueue_backlog uses to schedule IPIs.
  * select is flipped before net_rps_action is called while still under lock,
  * net_rps_action then uses the non-selected mask to send the IPIs and clears
  * it without conflicting with enqueue_backlog operation.
  */
-struct rps_remote_softirq_cpus {
-	cpumask_t mask[2];
-	int select;
-};
-static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
 
 /* Called from hardirq (IPI) context */
 static void trigger_softirq(void *data)
@@ -2403,10 +2398,10 @@ enqueue:
 		if (napi_schedule_prep(&queue->backlog)) {
 #ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
-				struct rps_remote_softirq_cpus *rcpus =
-				    &__get_cpu_var(rps_remote_softirq_cpus);
+				struct softnet_data *myqueue = &__get_cpu_var(softnet_data);
 
-				cpu_set(cpu, rcpus->mask[rcpus->select]);
+				cpu_set(cpu, myqueue->rps_mask[myqueue->rps_select]);
+				myqueue->rps_ipis_scheduled = 1;
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 				goto enqueue;
 			}
@@ -2911,7 +2906,9 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending  */
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
@@ -3340,24 +3337,36 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_RPS
 /*
- * net_rps_action sends any pending IPI's for rps.  This is only called from
- * softirq and interrupts must be enabled.
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(cpumask_t *mask)
+static void net_rps_action(void)
 {
-	int cpu;
+#ifdef CONFIG_RPS
+	if (percpu_read(softnet_data.rps_ipis_scheduled)) {
+		struct softnet_data *queue = &__get_cpu_var(softnet_data);
+		int cpu, select = queue->rps_select;
+		cpumask_t *mask;
+		
+		queue->rps_ipis_scheduled = 0;
+		queue->rps_select ^= 1;
 
-	/* Send pending IPI's to kick RPS processing on remote cpus. */
-	for_each_cpu_mask_nr(cpu, *mask) {
-		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
-		if (cpu_online(cpu))
-			__smp_call_function_single(cpu, &queue->csd, 0);
-	}
-	cpus_clear(*mask);
-}
+		local_irq_enable();
+
+		mask = &queue->rps_mask[select];
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		for_each_cpu_mask_nr(cpu, *mask) {
+			struct softnet_data *remqueue = &per_cpu(softnet_data, cpu);
+			if (cpu_online(cpu))
+				__smp_call_function_single(cpu, &remqueue->csd, 0);
+		}
+		cpus_clear(*mask);
+	} else
 #endif
+		local_irq_enable();
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3365,10 +3374,6 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_RPS
-	int select;
-	struct rps_remote_softirq_cpus *rcpus;
-#endif
 
 	local_irq_disable();
 
@@ -3431,17 +3436,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_RPS
-	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
-	select = rcpus->select;
-	rcpus->select ^= 1;
-
-	local_irq_enable();
-
-	net_rps_action(&rcpus->mask[select]);
-#else
-	local_irq_enable();
-#endif
+	net_rps_action();
 
 #ifdef CONFIG_NET_DMA
 	/*



  parent reply	other threads:[~2010-04-19  9:37 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-07 18:42 rps: question jamal
2010-02-08  5:58 ` Tom Herbert
2010-02-08 15:09   ` jamal
2010-04-14 11:53     ` rps perfomance WAS(Re: " jamal
2010-04-14 17:31       ` Tom Herbert
2010-04-14 18:04         ` Eric Dumazet
2010-04-14 18:53           ` jamal
2010-04-14 19:44             ` Stephen Hemminger
2010-04-14 19:58               ` Eric Dumazet
2010-04-15  8:51                 ` David Miller
2010-04-14 20:22               ` jamal
2010-04-14 20:27                 ` Eric Dumazet
2010-04-14 20:38                   ` jamal
2010-04-14 20:45                   ` Tom Herbert
2010-04-14 20:57                     ` Eric Dumazet
2010-04-14 22:51                       ` Changli Gao
2010-04-14 23:02                         ` Stephen Hemminger
2010-04-15  2:40                           ` Eric Dumazet
2010-04-15  2:50                             ` Changli Gao
2010-04-15  8:57                       ` David Miller
2010-04-15 12:10                       ` jamal
2010-04-15 12:32                         ` Changli Gao
2010-04-15 12:50                           ` jamal
2010-04-15 23:51                             ` Changli Gao
2010-04-15  8:51                 ` David Miller
2010-04-14 20:34               ` Andi Kleen
2010-04-15  8:50               ` David Miller
2010-04-15  8:48             ` David Miller
2010-04-15 11:55               ` jamal
2010-04-15 16:41                 ` Rick Jones
2010-04-15 20:16                   ` jamal
2010-04-15 20:25                     ` Rick Jones
2010-04-15 23:56                     ` Changli Gao
2010-04-16  5:18                       ` Eric Dumazet
2010-04-16  6:02                         ` Changli Gao
2010-04-16  6:28                           ` Tom Herbert
2010-04-16  6:32                           ` Eric Dumazet
2010-04-16 13:42                             ` jamal
2010-04-16  7:15                           ` Andi Kleen
2010-04-16 13:27                             ` jamal
2010-04-16 13:37                               ` Andi Kleen
2010-04-16 13:58                                 ` jamal
2010-04-16 13:21                         ` jamal
2010-04-16 13:34                           ` Changli Gao
2010-04-16 13:49                             ` jamal
2010-04-16 14:10                               ` Changli Gao
2010-04-16 14:43                                 ` jamal
2010-04-16 14:58                                   ` Changli Gao
2010-04-19 12:48                                     ` jamal
2010-04-17  7:35                           ` Eric Dumazet
2010-04-17  8:43                             ` Tom Herbert
2010-04-17  9:23                               ` Eric Dumazet
2010-04-17 14:27                                 ` Eric Dumazet
2010-04-17 17:26                                   ` Tom Herbert
2010-04-17 14:17                               ` [PATCH net-next-2.6] net: remove time limit in process_backlog() Eric Dumazet
2010-04-18  9:36                                 ` David Miller
2010-04-17 17:31                             ` rps perfomance WAS(Re: rps: question jamal
2010-04-18  9:39                               ` Eric Dumazet
2010-04-18 11:34                                 ` Eric Dumazet
2010-04-19  2:09                                   ` jamal
2010-04-19  9:37                                   ` Eric Dumazet [this message]
2010-04-19  9:48                                     ` [RFC] rps: shortcut net_rps_action() Changli Gao
2010-04-19 12:14                                       ` Eric Dumazet
2010-04-19 12:28                                         ` Changli Gao
2010-04-19 13:27                                           ` Eric Dumazet
2010-04-19 14:22                                             ` Eric Dumazet
2010-04-19 15:07                                               ` [PATCH net-next-2.6] " Eric Dumazet
2010-04-19 16:02                                                 ` Tom Herbert
2010-04-19 20:21                                                 ` David Miller
2010-04-20  7:17                                                   ` [PATCH net-next-2.6] rps: cleanups Eric Dumazet
2010-04-20  8:18                                                     ` David Miller
2010-04-19 23:56                                                 ` [PATCH net-next-2.6] rps: shortcut net_rps_action() Changli Gao
2010-04-20  0:32                                                   ` Changli Gao
2010-04-20  5:55                                                     ` Eric Dumazet
2010-04-20 12:02                                   ` rps perfomance WAS(Re: rps: question jamal
2010-04-20 13:13                                     ` Eric Dumazet
     [not found]                                       ` <1271853570.4032.21.camel@bigi>
2010-04-21 19:01                                         ` Eric Dumazet
2010-04-22  1:27                                           ` Changli Gao
2010-04-22 12:12                                           ` jamal
2010-04-25  2:31                                             ` Changli Gao
2010-04-26 11:35                                               ` jamal
2010-04-26 13:35                                                 ` Changli Gao
2010-04-21 21:53                                         ` Rick Jones
2010-04-16 15:57             ` Tom Herbert
2010-04-14 18:53       ` Stephen Hemminger
2010-04-15  8:42       ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1271669822.16881.7520.camel@edumazet-laptop \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=therbert@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.