linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: davem@davemloft.net, mst@redhat.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Jason Wang <jasowang@redhat.com>
Subject: [PATCH V5 3/3] tuntap: allow polling/writing/reading when detached
Date: Wed, 16 Jan 2013 18:34:01 +0800	[thread overview]
Message-ID: <1358332441-26859-4-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1358332441-26859-1-git-send-email-jasowang@redhat.com>

We forbid polling, writing and reading when the file were detached, this may
complex the user in several cases:

- when guest pass some buffers to vhost/qemu and then disable some queues,
  host/qemu needs to do its own cleanup on those buffers which is complex
  sometimes. We can do this simply by allowing a user can still write to an
  disabled queue. Write to an disabled queue will cause the packet pass to the
  kernel and read will get nothing.
- align the polling behavior with macvtap which never fails when the queue is
  created. This can simplify the polling errors handling of its user (e.g vhost)

In order to achieve this, tfile->tun were not assign to NULL when detached. And
tfile->tun were converted to be RCU protected in order to let the data path can
check whether the file is deated in a lockless manner. This will be used to
prevent the flow caches from being updated for a detached queue.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |   43 +++++++++++++++++++++++++------------------
 1 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c81680d..3f011e0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -139,7 +139,7 @@ struct tun_file {
 	unsigned int flags;
 	u16 queue_index;
 	struct list_head next;
-	struct tun_struct *detached;
+	struct tun_struct __rcu *detached;
 };
 
 struct tun_flow_entry {
@@ -295,11 +295,12 @@ static void tun_flow_cleanup(unsigned long data)
 }
 
 static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
-			    u16 queue_index)
+			    struct tun_file *tfile)
 {
 	struct hlist_head *head;
 	struct tun_flow_entry *e;
 	unsigned long delay = tun->ageing_time;
+	u16 queue_index = tfile->queue_index;
 
 	if (!rxhash)
 		return;
@@ -308,7 +309,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
 
 	rcu_read_lock();
 
-	if (tun->numqueues == 1)
+	if (tun->numqueues == 1 || !rtnl_dereference(tfile->detached))
 		goto unlock;
 
 	e = tun_flow_find(head, rxhash);
@@ -384,16 +385,16 @@ static void tun_set_real_num_queues(struct tun_struct *tun)
 
 static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile)
 {
-	tfile->detached = tun;
+	rcu_assign_pointer(tfile->detached, tun);
 	list_add_tail(&tfile->next, &tun->disabled);
 	++tun->numdisabled;
 }
 
 static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
 {
-	struct tun_struct *tun = tfile->detached;
+	struct tun_struct *tun = rtnl_dereference(tfile->detached);
 
-	tfile->detached = NULL;
+	rcu_assign_pointer(tfile->detached, NULL);
 	list_del_init(&tfile->next);
 	--tun->numdisabled;
 	return tun;
@@ -402,26 +403,27 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
 static void __tun_detach(struct tun_file *tfile, bool clean)
 {
 	struct tun_file *ntfile;
-	struct tun_struct *tun;
+	struct tun_struct *tun, *detached;
 	struct net_device *dev;
 
 	tun = rtnl_dereference(tfile->tun);
+	detached = rtnl_dereference(tfile->detached);
 
-	if (tun) {
+	if (tun && !detached) {
 		u16 index = tfile->queue_index;
 		BUG_ON(index >= tun->numqueues);
 		dev = tun->dev;
 
 		rcu_assign_pointer(tun->tfiles[index],
 				   tun->tfiles[tun->numqueues - 1]);
-		rcu_assign_pointer(tfile->tun, NULL);
 		ntfile = rtnl_dereference(tun->tfiles[index]);
 		ntfile->queue_index = index;
 
 		--tun->numqueues;
-		if (clean)
+		if (clean) {
+			rcu_assign_pointer(tfile->tun, NULL);
 			sock_put(&tfile->sk);
-		else
+		} else
 			tun_disable_queue(tun, tfile);
 
 		synchronize_net();
@@ -429,7 +431,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 		/* Drop read queue */
 		skb_queue_purge(&tfile->sk.sk_receive_queue);
 		tun_set_real_num_queues(tun);
-	} else if (tfile->detached && clean) {
+	} else if (detached && clean) {
 		tun = tun_enable_queue(tfile);
 		sock_put(&tfile->sk);
 	}
@@ -466,6 +468,10 @@ static void tun_detach_all(struct net_device *dev)
 		rcu_assign_pointer(tfile->tun, NULL);
 		--tun->numqueues;
 	}
+	list_for_each_entry(tfile, &tun->disabled, next) {
+		wake_up_all(&tfile->wq.wait);
+		rcu_assign_pointer(tfile->tun, NULL);
+	}
 	BUG_ON(tun->numqueues != 0);
 
 	synchronize_net();
@@ -496,7 +502,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 		goto out;
 
 	err = -EINVAL;
-	if (rtnl_dereference(tfile->tun))
+	if (rtnl_dereference(tfile->tun) && !rtnl_dereference(tfile->detached))
 		goto out;
 
 	err = -EBUSY;
@@ -504,7 +510,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 		goto out;
 
 	err = -E2BIG;
-	if (!tfile->detached &&
+	if (!rtnl_dereference(tfile->detached) &&
 	    tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES)
 		goto out;
 
@@ -521,7 +527,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
 	tun->numqueues++;
 
-	if (tfile->detached)
+	if (rtnl_dereference(tfile->detached))
 		tun_enable_queue(tfile);
 	else
 		sock_hold(&tfile->sk);
@@ -1195,7 +1201,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	tun->dev->stats.rx_packets++;
 	tun->dev->stats.rx_bytes += len;
 
-	tun_flow_update(tun, rxhash, tfile->queue_index);
+	tun_flow_update(tun, rxhash, tfile);
 	return total_len;
 }
 
@@ -1796,7 +1802,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 	rtnl_lock();
 
 	if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
-		tun = tfile->detached;
+		tun = rtnl_dereference(tfile->detached);
 		if (!tun) {
 			ret = -EINVAL;
 			goto unlock;
@@ -1807,7 +1813,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 		ret = tun_attach(tun, file);
 	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
 		tun = rtnl_dereference(tfile->tun);
-		if (!tun || !(tun->flags & TUN_TAP_MQ))
+		if (!tun || !(tun->flags & TUN_TAP_MQ) ||
+		    rtnl_dereference(tfile->detached))
 			ret = -EINVAL;
 		else
 			__tun_detach(tfile, false);
-- 
1.7.1


  parent reply	other threads:[~2013-01-16 10:42 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-16 10:33 [PATCH V5 0/3] handle polling errors in vhost/vhost_net Jason Wang
2013-01-16 10:33 ` [PATCH V5 1/3] vhost_net: correct error handling in vhost_net_set_backend() Jason Wang
2013-01-16 10:59   ` Michael S. Tsirkin
2013-01-16 10:34 ` [PATCH V5 2/3] vhost_net: handle polling errors when setting backend Jason Wang
2013-01-16 11:00   ` Michael S. Tsirkin
2013-01-16 10:34 ` Jason Wang [this message]
2013-01-16 10:59   ` [PATCH V5 3/3] tuntap: allow polling/writing/reading when detached Michael S. Tsirkin
2013-01-16 13:35     ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1358332441-26859-4-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).