linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Jason Wang <jasowang@redhat.com>
Subject: [PATCH 1/3] tuntap: rx batching
Date: Wed,  9 Nov 2016 15:38:31 +0800	[thread overview]
Message-ID: <1478677113-13126-1-git-send-email-jasowang@redhat.com> (raw)

Backlog were used for tuntap rx, but it can only process 1 packet at
one time since it was scheduled during sendmsg() synchronously in
process context. This lead bad cache utilization so this patch tries
to do some batching before call rx NAPI. This is done through:

- accept MSG_MORE as a hint from sendmsg() caller, if it was set,
  batch the packet temporarily in a linked list and submit them all
  once MSG_MORE were cleared.
- implement a tuntap specific NAPI handler for processing this kind of
  possible batching. (This could be done by extending backlog to
  support skb like, but using a tun specific one looks cleaner and
  easier for future extension).

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 65 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 1588469..d40583b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -74,6 +74,7 @@
 #include <linux/skb_array.h>
 
 #include <asm/uaccess.h>
+#include <linux/interrupt.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
@@ -169,6 +170,8 @@ struct tun_file {
 	struct list_head next;
 	struct tun_struct *detached;
 	struct skb_array tx_array;
+	struct napi_struct napi;
+	struct sk_buff_head process_queue;
 };
 
 struct tun_flow_entry {
@@ -522,6 +525,8 @@ static void tun_queue_purge(struct tun_file *tfile)
 	while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
 		kfree_skb(skb);
 
+	skb_queue_purge(&tfile->sk.sk_write_queue);
+	skb_queue_purge(&tfile->process_queue);
 	skb_queue_purge(&tfile->sk.sk_error_queue);
 }
 
@@ -532,6 +537,11 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 
 	tun = rtnl_dereference(tfile->tun);
 
+	if (tun && clean) {
+		napi_disable(&tfile->napi);
+		netif_napi_del(&tfile->napi);
+	}
+
 	if (tun && !tfile->detached) {
 		u16 index = tfile->queue_index;
 		BUG_ON(index >= tun->numqueues);
@@ -587,6 +597,7 @@ static void tun_detach_all(struct net_device *dev)
 
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
+		napi_disable(&tfile->napi);
 		BUG_ON(!tfile);
 		tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
 		tfile->socket.sk->sk_data_ready(tfile->socket.sk);
@@ -603,6 +614,7 @@ static void tun_detach_all(struct net_device *dev)
 	synchronize_net();
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
+		netif_napi_del(&tfile->napi);
 		/* Drop read queue */
 		tun_queue_purge(tfile);
 		sock_put(&tfile->sk);
@@ -618,6 +630,41 @@ static void tun_detach_all(struct net_device *dev)
 		module_put(THIS_MODULE);
 }
 
+static int tun_poll(struct napi_struct *napi, int budget)
+{
+	struct tun_file *tfile = container_of(napi, struct tun_file, napi);
+	struct sk_buff_head *input_queue =
+	       &tfile->socket.sk->sk_write_queue;
+	struct sk_buff *skb;
+	unsigned int received = 0;
+
+	while (1) {
+		while ((skb = __skb_dequeue(&tfile->process_queue))) {
+			netif_receive_skb(skb);
+			if (++received >= budget)
+				return received;
+		}
+
+		spin_lock(&input_queue->lock);
+		if (skb_queue_empty(input_queue)) {
+			spin_unlock(&input_queue->lock);
+			break;
+		}
+		skb_queue_splice_tail_init(input_queue, &tfile->process_queue);
+		spin_unlock(&input_queue->lock);
+	}
+
+	if (received < budget) {
+		napi_complete(napi);
+		if (skb_peek(&tfile->socket.sk->sk_write_queue) &&
+		    unlikely(napi_schedule_prep(napi))) {
+			__napi_schedule(napi);
+		}
+	}
+
+	return received;
+}
+
 static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
 {
 	struct tun_file *tfile = file->private_data;
@@ -666,9 +713,11 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 
 	if (tfile->detached)
 		tun_enable_queue(tfile);
-	else
+	else {
 		sock_hold(&tfile->sk);
-
+		netif_napi_add(tun->dev, &tfile->napi, tun_poll, 64);
+		napi_enable(&tfile->napi);
+	}
 	tun_set_real_num_queues(tun);
 
 	/* device is allowed to go away first, so no need to hold extra
@@ -1150,7 +1199,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
 /* Get packet from user space buffer */
 static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 			    void *msg_control, struct iov_iter *from,
-			    int noblock)
+			    int noblock, bool more)
 {
 	struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
 	struct sk_buff *skb;
@@ -1296,7 +1345,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	skb_probe_transport_header(skb, 0);
 
 	rxhash = skb_get_hash(skb);
-	netif_rx_ni(skb);
+	skb_queue_tail(&tfile->socket.sk->sk_write_queue, skb);
+
+	if (!more) {
+		local_bh_disable();
+		napi_schedule(&tfile->napi);
+		local_bh_enable();
+	}
 
 	stats = get_cpu_ptr(tun->pcpu_stats);
 	u64_stats_update_begin(&stats->syncp);
@@ -1319,7 +1374,8 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (!tun)
 		return -EBADFD;
 
-	result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK);
+	result = tun_get_user(tun, tfile, NULL, from,
+			      file->f_flags & O_NONBLOCK, false);
 
 	tun_put(tun);
 	return result;
@@ -1579,7 +1635,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		return -EBADFD;
 
 	ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
-			   m->msg_flags & MSG_DONTWAIT);
+			   m->msg_flags & MSG_DONTWAIT,
+			   m->msg_flags & MSG_MORE);
 	tun_put(tun);
 	return ret;
 }
@@ -2336,6 +2393,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	file->private_data = tfile;
 	INIT_LIST_HEAD(&tfile->next);
 
+	skb_queue_head_init(&tfile->process_queue);
+
 	sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
 
 	return 0;
-- 
2.7.4

             reply	other threads:[~2016-11-09  7:38 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-09  7:38 Jason Wang [this message]
2016-11-09  7:38 ` [PATCH 2/3] vhost: better detection of available buffers Jason Wang
2016-11-09 19:57   ` Michael S. Tsirkin
2016-11-11  2:18     ` Jason Wang
2016-11-11  3:41       ` Michael S. Tsirkin
2016-11-11  4:18         ` Jason Wang
2016-11-11 16:20           ` Michael S. Tsirkin
2016-11-15  3:16             ` Jason Wang
2016-11-15  3:28               ` Michael S. Tsirkin
2016-11-15  8:00                 ` Jason Wang
2016-11-15 14:46                   ` Michael S. Tsirkin
2016-11-09  7:38 ` [PATCH 3/3] vhost_net: tx support batching Jason Wang
2016-11-09 20:05   ` Michael S. Tsirkin
2016-11-11  2:27     ` Jason Wang
2016-11-09 16:38 ` [PATCH 1/3] tuntap: rx batching Michael S. Tsirkin
2016-11-11  2:07   ` Jason Wang
2016-11-11  3:31     ` Michael S. Tsirkin
2016-11-11  4:10       ` Jason Wang
2016-11-11  4:17       ` John Fastabend
2016-11-11  4:28         ` Jason Wang
2016-11-11  4:45           ` John Fastabend
2016-11-11 16:20           ` Michael S. Tsirkin
2016-11-15  3:14             ` Jason Wang
2016-11-15  3:41               ` Michael S. Tsirkin
2016-11-15  8:08                 ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1478677113-13126-1-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).