linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: davem@davemloft.net, mst@redhat.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: maxk@qti.qualcomm.com, edumazet@google.com, krkumar2@in.ibm.com,
	ernesto.martin@viasat.com, haixiao@juniper.net,
	Jason Wang <jasowang@redhat.com>
Subject: [net-next v5 3/7] tuntap: RCUify dereferencing between tun_struct and tun_file
Date: Thu,  1 Nov 2012 13:45:58 +0800	[thread overview]
Message-ID: <1351748762-3455-4-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1351748762-3455-1-git-send-email-jasowang@redhat.com>

RCU were introduced in this patch to synchronize the dereferences between
tun_struct and tun_file. All tun_{get|put} were replaced with RCU, the
dereference from one to other must be done under rtnl lock or rcu read critical
region.

This is needed for the following patches since the one of the goal of multiqueue
tuntap is to allow adding or removing queues during workload. Without RCU,
control path would hold tx locks when adding or removing queues (which may cause
sme delay) and it's hard to change the number of queues without stopping the net
device. With the help of rcu, there's also no need for tun_file hold an refcnt
to tun_struct.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |   95 ++++++++++++++++++++++++++---------------------------
 1 files changed, 47 insertions(+), 48 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d52ad24..bdbb526 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -115,13 +115,16 @@ struct tap_filter {
  * tap_filter were kept in tun_struct since they were used for filtering for the
  * netdevice not for a specific queue (at least I didn't see the reqirement for
  * this).
+ *
+ * RCU usage:
+ * The tun_file and tun_struct are loosely coupled, the pointer from on to the
+ * other can only be read while rcu_read_lock or rtnl_lock is held.
  */
 struct tun_file {
 	struct sock sk;
 	struct socket socket;
 	struct socket_wq wq;
-	atomic_t count;
-	struct tun_struct *tun;
+	struct tun_struct __rcu *tun;
 	struct net *net;
 	struct fasync_struct *fasync;
 	/* only used for fasnyc */
@@ -133,7 +136,7 @@ struct tun_file {
  * file were attached to a persist device.
  */
 struct tun_struct {
-	struct tun_file		*tfile;
+	struct tun_file	__rcu	*tfile;
 	unsigned int 		flags;
 	kuid_t			owner;
 	kgid_t			group;
@@ -179,13 +182,11 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 		if (!err)
 			goto out;
 	}
-	tfile->tun = tun;
+	rcu_assign_pointer(tfile->tun, tun);
 	tfile->socket.sk->sk_sndbuf = tun->sndbuf;
-	tun->tfile = tfile;
+	rcu_assign_pointer(tun->tfile, tfile);
 	netif_carrier_on(tun->dev);
-	dev_hold(tun->dev);
 	sock_hold(&tfile->sk);
-	atomic_inc(&tfile->count);
 
 out:
 	netif_tx_unlock_bh(tun->dev);
@@ -194,34 +195,29 @@ out:
 
 static void __tun_detach(struct tun_struct *tun)
 {
-	struct tun_file *tfile = tun->tfile;
+	struct tun_file *tfile = rcu_dereference_protected(tun->tfile,
+							lockdep_rtnl_is_held());
 	/* Detach from net device */
-	netif_tx_lock_bh(tun->dev);
 	netif_carrier_off(tun->dev);
-	tun->tfile = NULL;
-	tfile->tun = NULL;
-	netif_tx_unlock_bh(tun->dev);
-
-	/* Drop read queue */
-	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
-
-	/* Drop the extra count on the net device */
-	dev_put(tun->dev);
-}
+	rcu_assign_pointer(tun->tfile, NULL);
+	if (tfile) {
+		rcu_assign_pointer(tfile->tun, NULL);
 
-static void tun_detach(struct tun_struct *tun)
-{
-	rtnl_lock();
-	__tun_detach(tun);
-	rtnl_unlock();
+		synchronize_net();
+		/* Drop read queue */
+		skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
+	}
 }
 
 static struct tun_struct *__tun_get(struct tun_file *tfile)
 {
-	struct tun_struct *tun = NULL;
+	struct tun_struct *tun;
 
-	if (atomic_inc_not_zero(&tfile->count))
-		tun = tfile->tun;
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
+	if (tun)
+		dev_hold(tun->dev);
+	rcu_read_unlock();
 
 	return tun;
 }
@@ -233,10 +229,7 @@ static struct tun_struct *tun_get(struct file *file)
 
 static void tun_put(struct tun_struct *tun)
 {
-	struct tun_file *tfile = tun->tfile;
-
-	if (atomic_dec_and_test(&tfile->count))
-		tun_detach(tfile->tun);
+	dev_put(tun->dev);
 }
 
 /* TAP filtering */
@@ -357,14 +350,15 @@ static const struct ethtool_ops tun_ethtool_ops;
 static void tun_net_uninit(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_file *tfile = tun->tfile;
+	struct tun_file *tfile = rcu_dereference_protected(tun->tfile,
+							lockdep_rtnl_is_held());
 
 	/* Inform the methods they need to stop using the dev.
 	 */
 	if (tfile) {
 		wake_up_all(&tfile->wq.wait);
-		if (atomic_dec_and_test(&tfile->count))
-			__tun_detach(tun);
+		__tun_detach(tun);
+		synchronize_net();
 	}
 }
 
@@ -386,14 +380,16 @@ static int tun_net_close(struct net_device *dev)
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_file *tfile = tun->tfile;
-
-	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
+	struct tun_file *tfile;
 
+	rcu_read_lock();
+	tfile = rcu_dereference(tun->tfile);
 	/* Drop packet if interface is not attached */
 	if (!tfile)
 		goto drop;
 
+	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
+
 	/* Drop if the filter does not like it.
 	 * This is a noop if the filter is disabled.
 	 * Filter can be enabled only for the TAP devices. */
@@ -435,11 +431,14 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
 	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
 				   POLLRDNORM | POLLRDBAND);
+
+	rcu_read_unlock();
 	return NETDEV_TX_OK;
 
 drop:
 	dev->stats.tx_dropped++;
 	kfree_skb(skb);
+	rcu_read_unlock();
 	return NETDEV_TX_OK;
 }
 
@@ -1089,7 +1088,6 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (!tun)
 		return -EBADFD;
-
 	ret = tun_get_user(tun, tfile, m->msg_control, m->msg_iov, total_len,
 			   m->msg_iovlen, m->msg_flags & MSG_DONTWAIT);
 	tun_put(tun);
@@ -1662,8 +1660,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 					    &tun_proto);
 	if (!tfile)
 		return -ENOMEM;
-	atomic_set(&tfile->count, 0);
-	tfile->tun = NULL;
+	rcu_assign_pointer(tfile->tun, NULL);
 	tfile->net = get_net(current->nsproxy->net_ns);
 	tfile->flags = 0;
 
@@ -1691,7 +1688,9 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 	struct tun_struct *tun;
 	struct net *net = tfile->net;
 
-	tun = __tun_get(tfile);
+	rtnl_lock();
+
+	tun = rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held());
 	if (tun) {
 		struct net_device *dev = tun->dev;
 
@@ -1699,18 +1698,20 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 
 		__tun_detach(tun);
 
+		synchronize_net();
+
 		/* If desirable, unregister the netdevice. */
 		if (!(tun->flags & TUN_PERSIST)) {
-			rtnl_lock();
 			if (dev->reg_state == NETREG_REGISTERED)
 				unregister_netdevice(dev);
-			rtnl_unlock();
 		}
 
 		/* drop the reference that netdevice holds */
 		sock_put(&tfile->sk);
 	}
 
+	rtnl_unlock();
+
 	/* drop the reference that file holds */
 	BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
 			 &tfile->socket.flags));
@@ -1842,14 +1843,12 @@ static void tun_cleanup(void)
  * holding a reference to the file for as long as the socket is in use. */
 struct socket *tun_get_socket(struct file *file)
 {
-	struct tun_struct *tun;
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile;
 	if (file->f_op != &tun_fops)
 		return ERR_PTR(-EINVAL);
-	tun = tun_get(file);
-	if (!tun)
+	tfile = file->private_data;
+	if (!tfile)
 		return ERR_PTR(-EBADFD);
-	tun_put(tun);
 	return &tfile->socket;
 }
 EXPORT_SYMBOL_GPL(tun_get_socket);
-- 
1.7.1


  parent reply	other threads:[~2012-11-01  5:54 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-01  5:45 [net-next v5 0/7] Multiqueue support in tuntap Jason Wang
2012-11-01  5:45 ` [net-next v5 1/7] tuntap: log the unsigned informaiton with %u Jason Wang
2012-11-01  5:45 ` [net-next v5 2/7] tuntap: move socket to tun_file Jason Wang
2012-11-01  5:45 ` Jason Wang [this message]
2012-11-01  5:45 ` [net-next v5 4/7] tuntap: introduce multiqueue flags Jason Wang
2012-11-01  5:46 ` [net-next v5 5/7] tuntap: multiqueue support Jason Wang
2012-11-01  5:46 ` [net-next v5 6/7] tuntap: add ioctl to attach or detach a file form tuntap device Jason Wang
2012-11-12 15:11   ` Michael S. Tsirkin
2012-11-01  5:46 ` [net-next v5 7/7] tuntap: choose the txq based on rxq Jason Wang
2012-11-01 15:14 ` [net-next v5 0/7] Multiqueue support in tuntap David Miller
2012-11-02  2:18 ` Max Krasnyansky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1351748762-3455-4-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=ernesto.martin@viasat.com \
    --cc=haixiao@juniper.net \
    --cc=krkumar2@in.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maxk@qti.qualcomm.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).