All of lore.kernel.org
 help / color / mirror / Atom feed
From: Charles-Francois Natali <cf.natali@gmail.com>
To: wireguard@lists.zx2c4.com
Cc: Jason@zx2c4.com, Charles-Francois Natali <cf.natali@gmail.com>
Subject: [PATCH] WireGuard: restrict packet handling to non-isolated CPUs.
Date: Tue,  5 Apr 2022 22:21:29 +0100	[thread overview]
Message-ID: <20220405212129.2270-1-cf.natali@gmail.com> (raw)

WireGuard currently uses round-robin to dispatch the handling of
packets, handling them on all online CPUs, including isolated ones
(isolcpus).

This is unfortunate because it causes significant latency on isolated
CPUs - see e.g. below over 240 usec:

kworker/47:1-2373323 [047] 243644.756405: funcgraph_entry: |
process_one_work() {
    kworker/47:1-2373323 [047] 243644.756406: funcgraph_entry: |
wg_packet_decrypt_worker() {
[...]
    kworker/47:1-2373323 [047] 243644.756647: funcgraph_exit: 0.591 us | }
    kworker/47:1-2373323 [047] 243644.756647: funcgraph_exit: ! 242.655 us | }

Instead, restrict to non-isolated CPUs.

Example:

~# cat /sys/devices/system/cpu/isolated
3
~# /usr/share/doc/wireguard-tools/examples/ncat-client-server/client.sh
~# ping 192.168.4.1

Before - corresponding workqueues are executed on all CPUs:

~# trace-cmd record -p function -l wg_packet_decrypt_worker -- sleep 10
  plugin 'function'
CPU0 data recorded at offset=0x7d6000
    4096 bytes in size
CPU1 data recorded at offset=0x7d7000
    4096 bytes in size
CPU2 data recorded at offset=0x7d8000
    4096 bytes in size
CPU3 data recorded at offset=0x7d9000
    4096 bytes in size
~# trace-cmd report
cpus=4
     kworker/3:1-52    [003]    49.784353: function:
wg_packet_decrypt_worker
     kworker/0:1-17    [000]    50.782879: function:
wg_packet_decrypt_worker
     kworker/1:3-162   [001]    51.783044: function:
wg_packet_decrypt_worker
     kworker/2:1-56    [002]    52.782159: function:
wg_packet_decrypt_worker
     kworker/3:1-52    [003]    53.780919: function:
wg_packet_decrypt_worker
     kworker/0:0-6     [000]    54.781755: function:
wg_packet_decrypt_worker
     kworker/1:3-162   [001]    55.781273: function:
wg_packet_decrypt_worker
     kworker/2:1-56    [002]    56.781946: function:
wg_packet_decrypt_worker
     kworker/3:1-52    [003]    57.781010: function:
wg_packet_decrypt_worker
     kworker/0:0-6     [000]    58.782097: function:
wg_packet_decrypt_worker
~#

After - isolated CPU 3 is excluded:

~# trace-cmd record -p function -l wg_packet_decrypt_worker -- sleep 10
  plugin 'function'
CPU0 data recorded at offset=0x7d7000
    4096 bytes in size
CPU1 data recorded at offset=0x7d8000
    4096 bytes in size
CPU2 data recorded at offset=0x7d9000
    4096 bytes in size
CPU3 data recorded at offset=0x7da000
    0 bytes in size
~# trace-cmd report
CPU 3 is empty
cpus=4
     kworker/1:2-66    [001]   291.800063: function:
wg_packet_decrypt_worker
     kworker/2:2-143   [002]   292.800266: function:
wg_packet_decrypt_worker
     kworker/0:2-145   [000]   293.801778: function:
wg_packet_decrypt_worker
     kworker/1:4-261   [001]   294.803411: function:
wg_packet_decrypt_worker
     kworker/2:2-143   [002]   295.804068: function:
wg_packet_decrypt_worker
     kworker/0:2-145   [000]   296.806057: function:
wg_packet_decrypt_worker
     kworker/1:2-66    [001]   297.810686: function:
wg_packet_decrypt_worker
     kworker/2:2-143   [002]   298.811602: function:
wg_packet_decrypt_worker
     kworker/0:2-145   [000]   299.812790: function:
wg_packet_decrypt_worker
     kworker/1:4-261   [001]   300.813076: function:
wg_packet_decrypt_worker
~#

Signed-off-by: Charles-Francois Natali <cf.natali@gmail.com>
---
 drivers/net/wireguard/queueing.h | 59 +++++++++++++++++++++++++-------
 drivers/net/wireguard/receive.c  |  2 +-
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index 583adb37e..106a2686c 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -11,6 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/sched/isolation.h>
 #include <net/ip_tunnels.h>
 
 struct wg_device;
@@ -102,16 +103,50 @@ static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating)
 	skb_reset_inner_headers(skb);
 }
 
-static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
+/* We only want to dispatch work to housekeeping CPUs, ignoring isolated ones.
+ */
+static inline const struct cpumask *wg_cpumask_housekeeping(void)
+{
+	return housekeeping_cpumask(HK_FLAG_DOMAIN);
+}
+
+static inline int wg_cpumask_test_cpu(int cpu)
+{
+	return cpumask_test_cpu(cpu, cpu_online_mask) &&
+		cpumask_test_cpu(cpu, wg_cpumask_housekeeping());
+}
+
+static inline unsigned int wg_cpumask_first(void)
+{
+	return cpumask_first_and(cpu_online_mask, wg_cpumask_housekeeping());
+}
+
+static inline unsigned int wg_cpumask_next(int n)
+{
+	return cpumask_next_and(n, cpu_online_mask, wg_cpumask_housekeeping());
+}
+
+static inline unsigned int wg_cpumask_weight(void)
+{
+	int cpu;
+	int weight = 0;
+
+	for_each_cpu_and(cpu, cpu_online_mask, wg_cpumask_housekeeping()) {
+		++weight;
+	}
+
+	return weight;
+}
+
+static inline int wg_cpumask_choose_eligible(int *stored_cpu, unsigned int id)
 {
 	unsigned int cpu = *stored_cpu, cpu_index, i;
 
-	if (unlikely(cpu == nr_cpumask_bits ||
-		     !cpumask_test_cpu(cpu, cpu_online_mask))) {
-		cpu_index = id % cpumask_weight(cpu_online_mask);
-		cpu = cpumask_first(cpu_online_mask);
+	if (unlikely(cpu == nr_cpumask_bits || !wg_cpumask_test_cpu(cpu))) {
+		cpu_index = id % wg_cpumask_weight();
+		cpu = wg_cpumask_first();
 		for (i = 0; i < cpu_index; ++i)
-			cpu = cpumask_next(cpu, cpu_online_mask);
+			cpu = wg_cpumask_next(cpu);
 		*stored_cpu = cpu;
 	}
 	return cpu;
@@ -124,13 +159,13 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
  * a bit slower, and it doesn't seem like this potential race actually
  * introduces any performance loss, so we live with it.
  */
-static inline int wg_cpumask_next_online(int *next)
+static inline int wg_cpumask_next_eligible(int *next)
 {
 	int cpu = *next;
 
-	while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
-		cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
-	*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+	while (unlikely(!wg_cpumask_test_cpu(cpu)))
+		cpu = wg_cpumask_next(cpu) % nr_cpumask_bits;
+	*next = wg_cpumask_next(cpu) % nr_cpumask_bits;
 	return cpu;
 }
 
@@ -173,7 +208,7 @@ static inline int wg_queue_enqueue_per_device_and_peer(
 	/* Then we queue it up in the device queue, which consumes the
 	 * packet as soon as it can.
 	 */
-	cpu = wg_cpumask_next_online(next_cpu);
+	cpu = wg_cpumask_next_eligible(next_cpu);
 	if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
 		return -EPIPE;
 	queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
@@ -188,7 +223,7 @@ static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet
 	struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
 
 	atomic_set_release(&PACKET_CB(skb)->state, state);
-	queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id),
+	queue_work_on(wg_cpumask_choose_eligible(&peer->serial_work_cpu, peer->internal_id),
 		      peer->device->packet_crypt_wq, &peer->transmit_packet_work);
 	wg_peer_put(peer);
 }
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 7b8df406c..2d5d903d0 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -572,7 +572,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
 			goto err;
 		}
 		atomic_inc(&wg->handshake_queue_len);
-		cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu);
+		cpu = wg_cpumask_next_eligible(&wg->handshake_queue.last_cpu);
 		/* Queues up a call to packet_process_queued_handshake_packets(skb): */
 		queue_work_on(cpu, wg->handshake_receive_wq,
 			      &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work);
-- 
2.30.2


             reply	other threads:[~2022-04-21 23:55 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-05 21:21 Charles-Francois Natali [this message]
2022-04-22  0:02 ` [PATCH] WireGuard: restrict packet handling to non-isolated CPUs Jason A. Donenfeld
2022-04-22  0:40   ` Stephen Hemminger
2022-04-22 22:23   ` Charles-François Natali
2022-04-23  1:08     ` Jason A. Donenfeld

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220405212129.2270-1-cf.natali@gmail.com \
    --to=cf.natali@gmail.com \
    --cc=Jason@zx2c4.com \
    --cc=wireguard@lists.zx2c4.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.