[PATCH net-next 1/2] macvtap: avoid hash calculating for single queue

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue
@ 2016-07-15  7:46 Jason Wang
  2016-07-15  7:46 ` [PATCH net-next 2/2] macvtap: switch to use skb array Jason Wang
  2016-07-15 21:40 ` [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue David Miller
  0 siblings, 2 replies; 4+ messages in thread
From: Jason Wang @ 2016-07-15  7:46 UTC (permalink / raw)
  To: davem, netdev, linux-kernel; +Cc: mst, Jason Wang

We decide the rxq through calculating its hash which is not necessary
if we only have one rx queue. So this patch skip this and just return
queue 0. Test shows 22% improving on guest rx pps.

Before: 1201504 pkts/s
After:  1472731 pkts/s

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/macvtap.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 95a1332..2476923 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -299,6 +299,9 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
 	if (!numvtaps)
 		goto out;
 
+	if (numvtaps == 1)
+		goto single;
+
 	/* Check if we can use flow to select a queue */
 	rxq = skb_get_hash(skb);
 	if (rxq) {
@@ -316,6 +319,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
 		goto out;
 	}
 
+single:
 	tap = rcu_dereference(vlan->taps[0]);
 out:
 	return tap;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH net-next 2/2] macvtap: switch to use skb array
  2016-07-15  7:46 [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue Jason Wang
@ 2016-07-15  7:46 ` Jason Wang
  2016-07-15 21:41   ` David Miller
  2016-07-15 21:40 ` [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue David Miller
  1 sibling, 1 reply; 4+ messages in thread
From: Jason Wang @ 2016-07-15  7:46 UTC (permalink / raw)
  To: davem, netdev, linux-kernel; +Cc: mst, Jason Wang

This patch switch to use skb array instead of sk_receive_queue to
avoid spinlock contentions. Tests shows about 21% improvements for
guest rx pps:

Before: 1472731 pkts/s
After:  1786289 pkts/s

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/macvtap.c | 82 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 71 insertions(+), 11 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 2476923..9204d19 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -21,6 +21,7 @@
 #include <net/rtnetlink.h>
 #include <net/sock.h>
 #include <linux/virtio_net.h>
+#include <linux/skb_array.h>
 
 /*
  * A macvtap queue is the central object of this driver, it connects
@@ -43,6 +44,7 @@ struct macvtap_queue {
 	u16 queue_index;
 	bool enabled;
 	struct list_head next;
+	struct skb_array skb_array;
 };
 
 #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q)
 	rtnl_unlock();
 
 	synchronize_rcu();
+	skb_array_cleanup(&q->skb_array);
 	sock_put(&q->sk);
 }
 
@@ -366,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 	if (!q)
 		return RX_HANDLER_PASS;
 
-	if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
+	if (__skb_array_full(&q->skb_array))
 		goto drop;
 
 	skb_push(skb, ETH_HLEN);
@@ -384,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 			goto drop;
 
 		if (!segs) {
-			skb_queue_tail(&q->sk.sk_receive_queue, skb);
+			if (skb_array_produce(&q->skb_array, skb))
+				goto drop;
 			goto wake_up;
 		}
 
@@ -393,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 			struct sk_buff *nskb = segs->next;
 
 			segs->next = NULL;
-			skb_queue_tail(&q->sk.sk_receive_queue, segs);
+			if (skb_array_produce(&q->skb_array, segs)) {
+				kfree_skb(segs);
+				kfree_skb_list(nskb);
+				break;
+			}
 			segs = nskb;
 		}
 	} else {
@@ -406,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 		    !(features & NETIF_F_CSUM_MASK) &&
 		    skb_checksum_help(skb))
 			goto drop;
-		skb_queue_tail(&q->sk.sk_receive_queue, skb);
+		if (skb_array_produce(&q->skb_array, skb))
+			goto drop;
 	}
 
 wake_up:
@@ -523,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk)
 
 static void macvtap_sock_destruct(struct sock *sk)
 {
-	skb_queue_purge(&sk->sk_receive_queue);
+	struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
+	struct sk_buff *skb;
+
+	while ((skb = skb_array_consume(&q->skb_array)) != NULL)
+		kfree(skb);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
@@ -536,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	rtnl_lock();
 	dev = dev_get_by_macvtap_minor(iminor(inode));
 	if (!dev)
-		goto out;
+		goto err;
 
 	err = -ENOMEM;
 	q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
 					     &macvtap_proto, 0);
 	if (!q)
-		goto out;
+		goto err;
 
 	RCU_INIT_POINTER(q->sock.wq, &q->wq);
 	init_waitqueue_head(&q->wq.wait);
@@ -566,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
 		sock_set_flag(&q->sk, SOCK_ZEROCOPY);
 
+	err = -ENOMEM;
+	if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
+		goto err_array;
+
 	err = macvtap_set_queue(dev, file, q);
 	if (err)
-		sock_put(&q->sk);
+		goto err_queue;
 
-out:
+	dev_put(dev);
+
+	rtnl_unlock();
+	return err;
+
+err_queue:
+	skb_array_cleanup(&q->skb_array);
+err_array:
+	sock_put(&q->sk);
+err:
 	if (dev)
 		dev_put(dev);
 
@@ -596,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 	mask = 0;
 	poll_wait(file, &q->wq.wait, wait);
 
-	if (!skb_queue_empty(&q->sk.sk_receive_queue))
+	if (!skb_array_empty(&q->skb_array))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(&q->sk) ||
@@ -856,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q,
 					TASK_INTERRUPTIBLE);
 
 		/* Read frames from the queue */
-		skb = skb_dequeue(&q->sk.sk_receive_queue);
+		skb = skb_array_consume(&q->skb_array);
 		if (skb)
 			break;
 		if (noblock) {
@@ -1180,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
 	return ret;
 }
 
+static int macvtap_peek_len(struct socket *sock)
+{
+	struct macvtap_queue *q = container_of(sock, struct macvtap_queue,
+					       sock);
+	return skb_array_peek_len(&q->skb_array);
+}
+
 /* Ops structure to mimic raw sockets with tun */
 static const struct proto_ops macvtap_socket_ops = {
 	.sendmsg = macvtap_sendmsg,
 	.recvmsg = macvtap_recvmsg,
+	.peek_len = macvtap_peek_len,
 };
 
 /* Get an underlying socket object from tun file.  Returns error unless file is
@@ -1202,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(macvtap_get_socket);
 
+static int macvtap_queue_resize(struct macvlan_dev *vlan)
+{
+	struct net_device *dev = vlan->dev;
+	struct macvtap_queue *q;
+	struct skb_array **arrays;
+	int n = vlan->numqueues;
+	int ret, i = 0;
+
+	arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+	if (!arrays)
+		return -ENOMEM;
+
+	list_for_each_entry(q, &vlan->queue_list, next)
+		arrays[i++] = &q->skb_array;
+
+	ret = skb_array_resize_multiple(arrays, n,
+					dev->tx_queue_len, GFP_KERNEL);
+
+	kfree(arrays);
+	return ret;
+}
+
 static int macvtap_device_event(struct notifier_block *unused,
 				unsigned long event, void *ptr)
 {
@@ -1249,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused,
 		device_destroy(&macvtap_class, devt);
 		macvtap_free_minor(vlan);
 		break;
+	case NETDEV_CHANGE_TX_QUEUE_LEN:
+		if (macvtap_queue_resize(vlan))
+			return NOTIFY_BAD;
+		break;
 	}
 
 	return NOTIFY_DONE;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue
  2016-07-15  7:46 [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue Jason Wang
  2016-07-15  7:46 ` [PATCH net-next 2/2] macvtap: switch to use skb array Jason Wang
@ 2016-07-15 21:40 ` David Miller
  1 sibling, 0 replies; 4+ messages in thread
From: David Miller @ 2016-07-15 21:40 UTC (permalink / raw)
  To: jasowang; +Cc: netdev, linux-kernel, mst

From: Jason Wang <jasowang@redhat.com>
Date: Fri, 15 Jul 2016 03:46:30 -0400

> We decide the rxq through calculating its hash which is not necessary
> if we only have one rx queue. So this patch skip this and just return
> queue 0. Test shows 22% improving on guest rx pps.
> 
> Before: 1201504 pkts/s
> After:  1472731 pkts/s
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Applied.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 2/2] macvtap: switch to use skb array
  2016-07-15  7:46 ` [PATCH net-next 2/2] macvtap: switch to use skb array Jason Wang
@ 2016-07-15 21:41   ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2016-07-15 21:41 UTC (permalink / raw)
  To: jasowang; +Cc: netdev, linux-kernel, mst

From: Jason Wang <jasowang@redhat.com>
Date: Fri, 15 Jul 2016 03:46:31 -0400

> This patch switch to use skb array instead of sk_receive_queue to
> avoid spinlock contentions. Tests shows about 21% improvements for
> guest rx pps:
> 
> Before: 1472731 pkts/s
> After:  1786289 pkts/s
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Looks great, nice work.

Applied, thanks Jason.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-07-15 21:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-15  7:46 [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue Jason Wang
2016-07-15  7:46 ` [PATCH net-next 2/2] macvtap: switch to use skb array Jason Wang
2016-07-15 21:41   ` David Miller
2016-07-15 21:40 ` [PATCH net-next 1/2] macvtap: avoid hash calculating for single queue David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.