[PATCH v2] macvlan: use per-cpu queues for broadcast and multicast packets

From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
To: netdev@vger.kernel.org, "David S. Miller" <davem@davemloft.net>
Cc: Vadim Fedorenko <junk@yandex-team.ru>,
	Jiri Pirko <jiri@resnulli.us>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2] macvlan: use per-cpu queues for broadcast and multicast packets
Date: Wed, 19 Dec 2018 19:43:10 +0300	[thread overview]
Message-ID: <154523779011.671635.11120127931100080368.stgit@buzz> (raw)
In-Reply-To: <154116580015.953950.9450253307804393677.stgit@buzz>

Currently macvlan has single per-port queue for broadcast and multicast.
This disrupts order of packets when flows from different cpus are mixed.

This patch replaces this queue with single set of per-cpu queues.
Pointer to macvlan port is passed in skb control block.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: Vadim Fedorenko <vfedorenko@yandex-team.ru>
Tested-by: Vadim Fedorenko <vfedorenko@yandex-team.ru>
---
 drivers/net/macvlan.c |   65 +++++++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0da3d36b283b..d6e74bd01571 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -46,8 +46,6 @@ struct macvlan_port {
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
-	struct sk_buff_head	bc_queue;
-	struct work_struct	bc_work;
 	u32			flags;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
@@ -55,6 +53,11 @@ struct macvlan_port {
 	unsigned char           perm_addr[ETH_ALEN];
 };
 
+struct macvlan_bc_work {
+	struct sk_buff_head	bc_queue;
+	struct work_struct	bc_work;
+};
+
 struct macvlan_source_entry {
 	struct hlist_node	hlist;
 	struct macvlan_dev	*vlan;
@@ -63,6 +66,7 @@ struct macvlan_source_entry {
 };
 
 struct macvlan_skb_cb {
+	const struct macvlan_port *port;
 	const struct macvlan_dev *src;
 };
 
@@ -295,20 +299,23 @@ static void macvlan_broadcast(struct sk_buff *skb,
 	}
 }
 
+static DEFINE_PER_CPU(struct macvlan_bc_work, macvlan_bc_work);
+
 static void macvlan_process_broadcast(struct work_struct *w)
 {
-	struct macvlan_port *port = container_of(w, struct macvlan_port,
+	struct macvlan_bc_work *work = container_of(w, struct macvlan_bc_work,
 						 bc_work);
 	struct sk_buff *skb;
 	struct sk_buff_head list;
 
 	__skb_queue_head_init(&list);
 
-	spin_lock_bh(&port->bc_queue.lock);
-	skb_queue_splice_tail_init(&port->bc_queue, &list);
-	spin_unlock_bh(&port->bc_queue.lock);
+	spin_lock_bh(&work->bc_queue.lock);
+	skb_queue_splice_tail_init(&work->bc_queue, &list);
+	spin_unlock_bh(&work->bc_queue.lock);
 
 	while ((skb = __skb_dequeue(&list))) {
+		const struct macvlan_port *port = MACVLAN_SKB_CB(skb)->port;
 		const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
 
 		rcu_read_lock();
@@ -345,6 +352,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port,
 				      const struct macvlan_dev *src,
 				      struct sk_buff *skb)
 {
+	struct macvlan_bc_work *work;
 	struct sk_buff *nskb;
 	int err = -ENOMEM;
 
@@ -352,24 +360,30 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port,
 	if (!nskb)
 		goto err;
 
+	MACVLAN_SKB_CB(nskb)->port = port;
 	MACVLAN_SKB_CB(nskb)->src = src;
 
-	spin_lock(&port->bc_queue.lock);
-	if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) {
+	work = get_cpu_ptr(&macvlan_bc_work);
+
+	spin_lock(&work->bc_queue.lock);
+	if (skb_queue_len(&work->bc_queue) < MACVLAN_BC_QUEUE_LEN) {
 		if (src)
 			dev_hold(src->dev);
-		__skb_queue_tail(&port->bc_queue, nskb);
+		__skb_queue_tail(&work->bc_queue, nskb);
 		err = 0;
 	}
-	spin_unlock(&port->bc_queue.lock);
+	spin_unlock(&work->bc_queue.lock);
 
 	if (err)
 		goto free_nskb;
 
-	schedule_work(&port->bc_work);
+	schedule_work_on(smp_processor_id(), &work->bc_work);
+	put_cpu_ptr(work);
+
 	return;
 
 free_nskb:
+	put_cpu_ptr(work);
 	kfree_skb(nskb);
 err:
 	atomic_long_inc(&skb->dev->rx_dropped);
@@ -1171,9 +1185,6 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_source_hash[i]);
 
-	skb_queue_head_init(&port->bc_queue);
-	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
-
 	err = netdev_rx_handler_register(dev, macvlan_handle_frame, port);
 	if (err)
 		kfree(port);
@@ -1185,24 +1196,16 @@ static int macvlan_port_create(struct net_device *dev)
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = macvlan_port_get_rtnl(dev);
-	struct sk_buff *skb;
+	int cpu;
 
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
 
 	/* After this point, no packet can schedule bc_work anymore,
-	 * but we need to cancel it and purge left skbs if any.
+	 * but we need to flush work.
 	 */
-	cancel_work_sync(&port->bc_work);
-
-	while ((skb = __skb_dequeue(&port->bc_queue))) {
-		const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
-
-		if (src)
-			dev_put(src->dev);
-
-		kfree_skb(skb);
-	}
+	for_each_possible_cpu(cpu)
+		flush_work(per_cpu_ptr(&macvlan_bc_work.bc_work, cpu));
 
 	/* If the lower device address has been changed by passthru
 	 * macvlan, put it back.
@@ -1705,7 +1708,15 @@ static struct notifier_block macvlan_notifier_block __read_mostly = {
 
 static int __init macvlan_init_module(void)
 {
-	int err;
+	int err, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct macvlan_bc_work *work;
+
+		work = per_cpu_ptr(&macvlan_bc_work, cpu);
+		skb_queue_head_init(&work->bc_queue);
+		INIT_WORK(&work->bc_work, macvlan_process_broadcast);
+	}
 
 	register_netdevice_notifier(&macvlan_notifier_block);