netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] net, tun: remove the flow cache
@ 2013-12-17  7:26 Zhi Yong Wu
  2013-12-17  7:47 ` Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Zhi Yong Wu @ 2013-12-17  7:26 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, davem, mst, jasowang, Zhi Yong Wu

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

The flow cache is an extremely broken concept, and it usually brings up
growth issues and DoS attacks, so this patch is trying to remove it from
the tuntap driver, and insteadly use a simpler way for its flow control.

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 drivers/net/tun.c |  208 +++--------------------------------------------------
 1 files changed, 10 insertions(+), 198 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7c8343a..7c27fdc 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -32,12 +32,15 @@
  *
  *  Daniel Podlejski <underley@underley.eu.org>
  *    Modifications for 2.3.99-pre5 kernel.
+ *
+ *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+ *    Remove the flow cache.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #define DRV_NAME	"tun"
-#define DRV_VERSION	"1.6"
+#define DRV_VERSION	"1.7"
 #define DRV_DESCRIPTION	"Universal TUN/TAP device driver"
 #define DRV_COPYRIGHT	"(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
 
@@ -146,18 +149,6 @@ struct tun_file {
 	struct tun_struct *detached;
 };
 
-struct tun_flow_entry {
-	struct hlist_node hash_link;
-	struct rcu_head rcu;
-	struct tun_struct *tun;
-
-	u32 rxhash;
-	int queue_index;
-	unsigned long updated;
-};
-
-#define TUN_NUM_FLOW_ENTRIES 1024
-
 /* Since the socket were moved to tun_file, to preserve the behavior of persist
  * device, socket filter, sndbuf and vnet header size were restore when the
  * file were attached to a persist device.
@@ -184,163 +175,11 @@ struct tun_struct {
 	int debug;
 #endif
 	spinlock_t lock;
-	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
-	struct timer_list flow_gc_timer;
-	unsigned long ageing_time;
 	unsigned int numdisabled;
 	struct list_head disabled;
 	void *security;
-	u32 flow_count;
 };
 
-static inline u32 tun_hashfn(u32 rxhash)
-{
-	return rxhash & 0x3ff;
-}
-
-static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
-{
-	struct tun_flow_entry *e;
-
-	hlist_for_each_entry_rcu(e, head, hash_link) {
-		if (e->rxhash == rxhash)
-			return e;
-	}
-	return NULL;
-}
-
-static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
-					      struct hlist_head *head,
-					      u32 rxhash, u16 queue_index)
-{
-	struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
-
-	if (e) {
-		tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
-			  rxhash, queue_index);
-		e->updated = jiffies;
-		e->rxhash = rxhash;
-		e->queue_index = queue_index;
-		e->tun = tun;
-		hlist_add_head_rcu(&e->hash_link, head);
-		++tun->flow_count;
-	}
-	return e;
-}
-
-static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
-{
-	tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
-		  e->rxhash, e->queue_index);
-	hlist_del_rcu(&e->hash_link);
-	kfree_rcu(e, rcu);
-	--tun->flow_count;
-}
-
-static void tun_flow_flush(struct tun_struct *tun)
-{
-	int i;
-
-	spin_lock_bh(&tun->lock);
-	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
-		struct tun_flow_entry *e;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
-			tun_flow_delete(tun, e);
-	}
-	spin_unlock_bh(&tun->lock);
-}
-
-static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
-{
-	int i;
-
-	spin_lock_bh(&tun->lock);
-	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
-		struct tun_flow_entry *e;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
-			if (e->queue_index == queue_index)
-				tun_flow_delete(tun, e);
-		}
-	}
-	spin_unlock_bh(&tun->lock);
-}
-
-static void tun_flow_cleanup(unsigned long data)
-{
-	struct tun_struct *tun = (struct tun_struct *)data;
-	unsigned long delay = tun->ageing_time;
-	unsigned long next_timer = jiffies + delay;
-	unsigned long count = 0;
-	int i;
-
-	tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
-
-	spin_lock_bh(&tun->lock);
-	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
-		struct tun_flow_entry *e;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
-			unsigned long this_timer;
-			count++;
-			this_timer = e->updated + delay;
-			if (time_before_eq(this_timer, jiffies))
-				tun_flow_delete(tun, e);
-			else if (time_before(this_timer, next_timer))
-				next_timer = this_timer;
-		}
-	}
-
-	if (count)
-		mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
-	spin_unlock_bh(&tun->lock);
-}
-
-static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
-			    struct tun_file *tfile)
-{
-	struct hlist_head *head;
-	struct tun_flow_entry *e;
-	unsigned long delay = tun->ageing_time;
-	u16 queue_index = tfile->queue_index;
-
-	if (!rxhash)
-		return;
-	else
-		head = &tun->flows[tun_hashfn(rxhash)];
-
-	rcu_read_lock();
-
-	/* We may get a very small possibility of OOO during switching, not
-	 * worth to optimize.*/
-	if (tun->numqueues == 1 || tfile->detached)
-		goto unlock;
-
-	e = tun_flow_find(head, rxhash);
-	if (likely(e)) {
-		/* TODO: keep queueing to old queue until it's empty? */
-		e->queue_index = queue_index;
-		e->updated = jiffies;
-	} else {
-		spin_lock_bh(&tun->lock);
-		if (!tun_flow_find(head, rxhash) &&
-		    tun->flow_count < MAX_TAP_FLOWS)
-			tun_flow_create(tun, head, rxhash, queue_index);
-
-		if (!timer_pending(&tun->flow_gc_timer))
-			mod_timer(&tun->flow_gc_timer,
-				  round_jiffies_up(jiffies + delay));
-		spin_unlock_bh(&tun->lock);
-	}
-
-unlock:
-	rcu_read_unlock();
-}
-
 /* We try to identify a flow through its rxhash first. The reason that
  * we do not check rxq no. is becuase some cards(e.g 82599), chooses
  * the rxq based on the txq where the last packet of the flow comes. As
@@ -351,7 +190,6 @@ unlock:
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
 {
 	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_flow_entry *e;
 	u32 txq = 0;
 	u32 numqueues = 0;
 
@@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
 
 	txq = skb_get_rxhash(skb);
 	if (txq) {
-		e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
-		if (e)
-			txq = e->queue_index;
-		else
+		txq = skb_get_queue_mapping(skb);
+		if (!txq) {
 			/* use multiply and shift instead of expensive divide */
 			txq = ((u64)txq * numqueues) >> 32;
+		}
 	} else if (likely(skb_rx_queue_recorded(skb))) {
 		txq = skb_get_rx_queue(skb);
 		while (unlikely(txq >= numqueues))
@@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 			tun_disable_queue(tun, tfile);
 
 		synchronize_net();
-		tun_flow_delete_by_queue(tun, tun->numqueues + 1);
 		/* Drop read queue */
 		tun_queue_purge(tfile);
 		tun_set_real_num_queues(tun);
@@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
 #endif
 };
 
-static void tun_flow_init(struct tun_struct *tun)
-{
-	int i;
-
-	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
-		INIT_HLIST_HEAD(&tun->flows[i]);
-
-	tun->ageing_time = TUN_FLOW_EXPIRE;
-	setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
-	mod_timer(&tun->flow_gc_timer,
-		  round_jiffies_up(jiffies + tun->ageing_time));
-}
-
-static void tun_flow_uninit(struct tun_struct *tun)
-{
-	del_timer_sync(&tun->flow_gc_timer);
-	tun_flow_flush(tun);
-}
-
 /* Initialize net device. */
 static void tun_net_init(struct net_device *dev)
 {
@@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	int copylen;
 	bool zerocopy = false;
 	int err;
-	u32 rxhash;
 
 	if (!(tun->flags & TUN_NO_PI)) {
 		if (len < sizeof(pi))
@@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	skb_reset_network_header(skb);
 	skb_probe_transport_header(skb, 0);
 
-	rxhash = skb_get_rxhash(skb);
+	skb_set_queue_mapping(skb, tfile->queue_index);
 	netif_rx_ni(skb);
 
 	tun->dev->stats.rx_packets++;
 	tun->dev->stats.rx_bytes += len;
 
-	tun_flow_update(tun, rxhash, tfile);
 	return total_len;
 }
 
@@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
 	struct tun_struct *tun = netdev_priv(dev);
 
 	BUG_ON(!(list_empty(&tun->disabled)));
-	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
 	free_netdev(dev);
 }
@@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			goto err_free_dev;
 
 		tun_net_init(dev);
-		tun_flow_init(tun);
 
 		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
 				   TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
@@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		INIT_LIST_HEAD(&tun->disabled);
 		err = tun_attach(tun, file, false);
 		if (err < 0)
-			goto err_free_flow;
+			goto err_free_security;
 
 		err = register_netdevice(tun->dev);
 		if (err < 0)
@@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 err_detach:
 	tun_detach_all(dev);
-err_free_flow:
-	tun_flow_uninit(tun);
+err_free_security:
 	security_tun_dev_free_security(tun->security);
 err_free_dev:
 	free_netdev(dev);
-- 
1.7.6.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17  7:26 [RFC PATCH] net, tun: remove the flow cache Zhi Yong Wu
@ 2013-12-17  7:47 ` Stephen Hemminger
  2013-12-17  8:13   ` Zhi Yong Wu
  2013-12-17  8:49 ` Jason Wang
  2013-12-18  4:06 ` Tom Herbert
  2 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2013-12-17  7:47 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: netdev, linux-kernel, davem, mst, jasowang, Zhi Yong Wu

On Tue, 17 Dec 2013 15:26:22 +0800
Zhi Yong Wu <zwu.kernel@gmail.com> wrote:

> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> 
> The flow cache is an extremely broken concept, and it usually brings up
> growth issues and DoS attacks, so this patch is trying to remove it from
> the tuntap driver, and insteadly use a simpler way for its flow control.
> 
> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> ---
>  drivers/net/tun.c |  208 +++--------------------------------------------------
>  1 files changed, 10 insertions(+), 198 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 7c8343a..7c27fdc 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -32,12 +32,15 @@
>   *
>   *  Daniel Podlejski <underley@underley.eu.org>
>   *    Modifications for 2.3.99-pre5 kernel.
> + *
> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> + *    Remove the flow cache.
>   */

I agree with your patch, but please don't add to the comment changelog.
These are all historical. The kernel development process has not used
them for 5+ years.

Can we get kernel janitors to just remove them, or would that step
on too many early developers toes by removing credit?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17  7:47 ` Stephen Hemminger
@ 2013-12-17  8:13   ` Zhi Yong Wu
  0 siblings, 0 replies; 12+ messages in thread
From: Zhi Yong Wu @ 2013-12-17  8:13 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Zhi Yong Wu, netdev, linux-kernel, davem, mst, jasowang

On Mon, 2013-12-16 at 23:47 -0800, Stephen Hemminger wrote:
> On Tue, 17 Dec 2013 15:26:22 +0800
> Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
> 
> > From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> > 
> > The flow cache is an extremely broken concept, and it usually brings up
> > growth issues and DoS attacks, so this patch is trying to remove it from
> > the tuntap driver, and insteadly use a simpler way for its flow control.
> > 
> > Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> > ---
> >  drivers/net/tun.c |  208 +++--------------------------------------------------
> >  1 files changed, 10 insertions(+), 198 deletions(-)
> > 
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 7c8343a..7c27fdc 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -32,12 +32,15 @@
> >   *
> >   *  Daniel Podlejski <underley@underley.eu.org>
> >   *    Modifications for 2.3.99-pre5 kernel.
> > + *
> > + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> > + *    Remove the flow cache.
> >   */
> 
> I agree with your patch, but please don't add to the comment changelog.
> These are all historical. The kernel development process has not used
> them for 5+ years.
> 
> Can we get kernel janitors to just remove them, or would that step
> on too many early developers toes by removing credit?
I thought that it is a big code change, and need to add some changelog
for this, but you seem to have a big argue. :) I don't object to
removing my comment in its changelog if other guys also agree with you.


> 

-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17  7:26 [RFC PATCH] net, tun: remove the flow cache Zhi Yong Wu
  2013-12-17  7:47 ` Stephen Hemminger
@ 2013-12-17  8:49 ` Jason Wang
  2013-12-17  9:13   ` Zhi Yong Wu
  2013-12-18  4:06 ` Tom Herbert
  2 siblings, 1 reply; 12+ messages in thread
From: Jason Wang @ 2013-12-17  8:49 UTC (permalink / raw)
  To: Zhi Yong Wu, netdev; +Cc: linux-kernel, davem, mst, Zhi Yong Wu

On 12/17/2013 03:26 PM, Zhi Yong Wu wrote:
> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>
> The flow cache is an extremely broken concept, and it usually brings up
> growth issues and DoS attacks, so this patch is trying to remove it from
> the tuntap driver, and insteadly use a simpler way for its flow control.

NACK.

This single function revert does not make sense to me. Since:

- You in fact removes the flow steering function in tun. We definitely
need something like this to unbreak the TCP performance in a multiqueue
guest. Please have a look at the virtio-net driver / virtio sepc for
more information.
- The total number of flow caches were limited to 4096, so there's no
DoS or growth issue.
- The only issue is scalability, but fixing this is not easy. We can
just use arrays/indirection table like RSS instead of hash buckets, it
saves some time in linear search but has other issues like collision
- I've also had a RFC of using aRFS in the past, it also has several
drawbacks such as busy looping in the networking hotspot.

So in conclusion, we need flow steering in tun, just removing current
method does not help. The proper way is to expose several different
methods to user and let user to choose the preferable mechanism like
packet.

>
> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> ---
>  drivers/net/tun.c |  208 +++--------------------------------------------------
>  1 files changed, 10 insertions(+), 198 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 7c8343a..7c27fdc 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -32,12 +32,15 @@
>   *
>   *  Daniel Podlejski <underley@underley.eu.org>
>   *    Modifications for 2.3.99-pre5 kernel.
> + *
> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> + *    Remove the flow cache.
>   */
>  
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
>  #define DRV_NAME	"tun"
> -#define DRV_VERSION	"1.6"
> +#define DRV_VERSION	"1.7"
>  #define DRV_DESCRIPTION	"Universal TUN/TAP device driver"
>  #define DRV_COPYRIGHT	"(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
>  
> @@ -146,18 +149,6 @@ struct tun_file {
>  	struct tun_struct *detached;
>  };
>  
> -struct tun_flow_entry {
> -	struct hlist_node hash_link;
> -	struct rcu_head rcu;
> -	struct tun_struct *tun;
> -
> -	u32 rxhash;
> -	int queue_index;
> -	unsigned long updated;
> -};
> -
> -#define TUN_NUM_FLOW_ENTRIES 1024
> -
>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>   * device, socket filter, sndbuf and vnet header size were restore when the
>   * file were attached to a persist device.
> @@ -184,163 +175,11 @@ struct tun_struct {
>  	int debug;
>  #endif
>  	spinlock_t lock;
> -	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
> -	struct timer_list flow_gc_timer;
> -	unsigned long ageing_time;
>  	unsigned int numdisabled;
>  	struct list_head disabled;
>  	void *security;
> -	u32 flow_count;
>  };
>  
> -static inline u32 tun_hashfn(u32 rxhash)
> -{
> -	return rxhash & 0x3ff;
> -}
> -
> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
> -{
> -	struct tun_flow_entry *e;
> -
> -	hlist_for_each_entry_rcu(e, head, hash_link) {
> -		if (e->rxhash == rxhash)
> -			return e;
> -	}
> -	return NULL;
> -}
> -
> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
> -					      struct hlist_head *head,
> -					      u32 rxhash, u16 queue_index)
> -{
> -	struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
> -
> -	if (e) {
> -		tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
> -			  rxhash, queue_index);
> -		e->updated = jiffies;
> -		e->rxhash = rxhash;
> -		e->queue_index = queue_index;
> -		e->tun = tun;
> -		hlist_add_head_rcu(&e->hash_link, head);
> -		++tun->flow_count;
> -	}
> -	return e;
> -}
> -
> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
> -{
> -	tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
> -		  e->rxhash, e->queue_index);
> -	hlist_del_rcu(&e->hash_link);
> -	kfree_rcu(e, rcu);
> -	--tun->flow_count;
> -}
> -
> -static void tun_flow_flush(struct tun_struct *tun)
> -{
> -	int i;
> -
> -	spin_lock_bh(&tun->lock);
> -	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
> -		struct tun_flow_entry *e;
> -		struct hlist_node *n;
> -
> -		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
> -			tun_flow_delete(tun, e);
> -	}
> -	spin_unlock_bh(&tun->lock);
> -}
> -
> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
> -{
> -	int i;
> -
> -	spin_lock_bh(&tun->lock);
> -	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
> -		struct tun_flow_entry *e;
> -		struct hlist_node *n;
> -
> -		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
> -			if (e->queue_index == queue_index)
> -				tun_flow_delete(tun, e);
> -		}
> -	}
> -	spin_unlock_bh(&tun->lock);
> -}
> -
> -static void tun_flow_cleanup(unsigned long data)
> -{
> -	struct tun_struct *tun = (struct tun_struct *)data;
> -	unsigned long delay = tun->ageing_time;
> -	unsigned long next_timer = jiffies + delay;
> -	unsigned long count = 0;
> -	int i;
> -
> -	tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
> -
> -	spin_lock_bh(&tun->lock);
> -	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
> -		struct tun_flow_entry *e;
> -		struct hlist_node *n;
> -
> -		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
> -			unsigned long this_timer;
> -			count++;
> -			this_timer = e->updated + delay;
> -			if (time_before_eq(this_timer, jiffies))
> -				tun_flow_delete(tun, e);
> -			else if (time_before(this_timer, next_timer))
> -				next_timer = this_timer;
> -		}
> -	}
> -
> -	if (count)
> -		mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
> -	spin_unlock_bh(&tun->lock);
> -}
> -
> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
> -			    struct tun_file *tfile)
> -{
> -	struct hlist_head *head;
> -	struct tun_flow_entry *e;
> -	unsigned long delay = tun->ageing_time;
> -	u16 queue_index = tfile->queue_index;
> -
> -	if (!rxhash)
> -		return;
> -	else
> -		head = &tun->flows[tun_hashfn(rxhash)];
> -
> -	rcu_read_lock();
> -
> -	/* We may get a very small possibility of OOO during switching, not
> -	 * worth to optimize.*/
> -	if (tun->numqueues == 1 || tfile->detached)
> -		goto unlock;
> -
> -	e = tun_flow_find(head, rxhash);
> -	if (likely(e)) {
> -		/* TODO: keep queueing to old queue until it's empty? */
> -		e->queue_index = queue_index;
> -		e->updated = jiffies;
> -	} else {
> -		spin_lock_bh(&tun->lock);
> -		if (!tun_flow_find(head, rxhash) &&
> -		    tun->flow_count < MAX_TAP_FLOWS)
> -			tun_flow_create(tun, head, rxhash, queue_index);
> -
> -		if (!timer_pending(&tun->flow_gc_timer))
> -			mod_timer(&tun->flow_gc_timer,
> -				  round_jiffies_up(jiffies + delay));
> -		spin_unlock_bh(&tun->lock);
> -	}
> -
> -unlock:
> -	rcu_read_unlock();
> -}
> -
>  /* We try to identify a flow through its rxhash first. The reason that
>   * we do not check rxq no. is becuase some cards(e.g 82599), chooses
>   * the rxq based on the txq where the last packet of the flow comes. As
> @@ -351,7 +190,6 @@ unlock:
>  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>  {
>  	struct tun_struct *tun = netdev_priv(dev);
> -	struct tun_flow_entry *e;
>  	u32 txq = 0;
>  	u32 numqueues = 0;
>  
> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>  
>  	txq = skb_get_rxhash(skb);
>  	if (txq) {
> -		e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
> -		if (e)
> -			txq = e->queue_index;
> -		else
> +		txq = skb_get_queue_mapping(skb);
> +		if (!txq) {
>  			/* use multiply and shift instead of expensive divide */
>  			txq = ((u64)txq * numqueues) >> 32;
> +		}
>  	} else if (likely(skb_rx_queue_recorded(skb))) {
>  		txq = skb_get_rx_queue(skb);
>  		while (unlikely(txq >= numqueues))
> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
>  			tun_disable_queue(tun, tfile);
>  
>  		synchronize_net();
> -		tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>  		/* Drop read queue */
>  		tun_queue_purge(tfile);
>  		tun_set_real_num_queues(tun);
> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
>  #endif
>  };
>  
> -static void tun_flow_init(struct tun_struct *tun)
> -{
> -	int i;
> -
> -	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
> -		INIT_HLIST_HEAD(&tun->flows[i]);
> -
> -	tun->ageing_time = TUN_FLOW_EXPIRE;
> -	setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
> -	mod_timer(&tun->flow_gc_timer,
> -		  round_jiffies_up(jiffies + tun->ageing_time));
> -}
> -
> -static void tun_flow_uninit(struct tun_struct *tun)
> -{
> -	del_timer_sync(&tun->flow_gc_timer);
> -	tun_flow_flush(tun);
> -}
> -
>  /* Initialize net device. */
>  static void tun_net_init(struct net_device *dev)
>  {
> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>  	int copylen;
>  	bool zerocopy = false;
>  	int err;
> -	u32 rxhash;
>  
>  	if (!(tun->flags & TUN_NO_PI)) {
>  		if (len < sizeof(pi))
> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>  	skb_reset_network_header(skb);
>  	skb_probe_transport_header(skb, 0);
>  
> -	rxhash = skb_get_rxhash(skb);
> +	skb_set_queue_mapping(skb, tfile->queue_index);
>  	netif_rx_ni(skb);
>  
>  	tun->dev->stats.rx_packets++;
>  	tun->dev->stats.rx_bytes += len;
>  
> -	tun_flow_update(tun, rxhash, tfile);
>  	return total_len;
>  }
>  
> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
>  	struct tun_struct *tun = netdev_priv(dev);
>  
>  	BUG_ON(!(list_empty(&tun->disabled)));
> -	tun_flow_uninit(tun);
>  	security_tun_dev_free_security(tun->security);
>  	free_netdev(dev);
>  }
> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  			goto err_free_dev;
>  
>  		tun_net_init(dev);
> -		tun_flow_init(tun);
>  
>  		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>  				   TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  		INIT_LIST_HEAD(&tun->disabled);
>  		err = tun_attach(tun, file, false);
>  		if (err < 0)
> -			goto err_free_flow;
> +			goto err_free_security;
>  
>  		err = register_netdevice(tun->dev);
>  		if (err < 0)
> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  err_detach:
>  	tun_detach_all(dev);
> -err_free_flow:
> -	tun_flow_uninit(tun);
> +err_free_security:
>  	security_tun_dev_free_security(tun->security);
>  err_free_dev:
>  	free_netdev(dev);

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17  8:49 ` Jason Wang
@ 2013-12-17  9:13   ` Zhi Yong Wu
  2013-12-17 10:05     ` Jason Wang
  0 siblings, 1 reply; 12+ messages in thread
From: Zhi Yong Wu @ 2013-12-17  9:13 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel mlist, David S. Miller, Michael S. Tsirkin,
	Zhi Yong Wu

On Tue, Dec 17, 2013 at 4:49 PM, Jason Wang <jasowang@redhat.com> wrote:
> On 12/17/2013 03:26 PM, Zhi Yong Wu wrote:
>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>> The flow cache is an extremely broken concept, and it usually brings up
>> growth issues and DoS attacks, so this patch is trying to remove it from
>> the tuntap driver, and insteadly use a simpler way for its flow control.
>
> NACK.
>
> This single function revert does not make sense to me. Since:
IIRC, the tuntap flow cache is only used to save the mapping of skb
packet <-> queue index. My idea only save the queue index in skb_buff
early when skb buffer is filled, not in flow cache as the current
code. This method is actually more simpler and completely doesn't need
any flow cache.

>
> - You in fact removes the flow steering function in tun. We definitely
> need something like this to unbreak the TCP performance in a multiqueue
I don't think it will downgrade the TCP perf even in mq guest, but my
idea maybe has better TCP perf, because it doesn't have any cache
table lookup, etc.
> guest. Please have a look at the virtio-net driver / virtio sepc for
> more information.
> - The total number of flow caches were limited to 4096, so there's no
> DoS or growth issue.
Can you check why the ipv4 routing cache is removed? maybe i miss
something, if yes, pls correct me. :)
> - The only issue is scalability, but fixing this is not easy. We can
> just use arrays/indirection table like RSS instead of hash buckets, it
> saves some time in linear search but has other issues like collision
> - I've also had a RFC of using aRFS in the past, it also has several
> drawbacks such as busy looping in the networking hotspot.
>
> So in conclusion, we need flow steering in tun, just removing current
> method does not help. The proper way is to expose several different
> methods to user and let user to choose the preferable mechanism like
> packet.
By the way, let us look at what other networking guys think of this,
such as MST, dave, etc. :)

>
>>
>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> ---
>>  drivers/net/tun.c |  208 +++--------------------------------------------------
>>  1 files changed, 10 insertions(+), 198 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 7c8343a..7c27fdc 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -32,12 +32,15 @@
>>   *
>>   *  Daniel Podlejski <underley@underley.eu.org>
>>   *    Modifications for 2.3.99-pre5 kernel.
>> + *
>> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> + *    Remove the flow cache.
>>   */
>>
>>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>>
>>  #define DRV_NAME     "tun"
>> -#define DRV_VERSION  "1.6"
>> +#define DRV_VERSION  "1.7"
>>  #define DRV_DESCRIPTION      "Universal TUN/TAP device driver"
>>  #define DRV_COPYRIGHT        "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
>>
>> @@ -146,18 +149,6 @@ struct tun_file {
>>       struct tun_struct *detached;
>>  };
>>
>> -struct tun_flow_entry {
>> -     struct hlist_node hash_link;
>> -     struct rcu_head rcu;
>> -     struct tun_struct *tun;
>> -
>> -     u32 rxhash;
>> -     int queue_index;
>> -     unsigned long updated;
>> -};
>> -
>> -#define TUN_NUM_FLOW_ENTRIES 1024
>> -
>>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>>   * device, socket filter, sndbuf and vnet header size were restore when the
>>   * file were attached to a persist device.
>> @@ -184,163 +175,11 @@ struct tun_struct {
>>       int debug;
>>  #endif
>>       spinlock_t lock;
>> -     struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>> -     struct timer_list flow_gc_timer;
>> -     unsigned long ageing_time;
>>       unsigned int numdisabled;
>>       struct list_head disabled;
>>       void *security;
>> -     u32 flow_count;
>>  };
>>
>> -static inline u32 tun_hashfn(u32 rxhash)
>> -{
>> -     return rxhash & 0x3ff;
>> -}
>> -
>> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
>> -{
>> -     struct tun_flow_entry *e;
>> -
>> -     hlist_for_each_entry_rcu(e, head, hash_link) {
>> -             if (e->rxhash == rxhash)
>> -                     return e;
>> -     }
>> -     return NULL;
>> -}
>> -
>> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
>> -                                           struct hlist_head *head,
>> -                                           u32 rxhash, u16 queue_index)
>> -{
>> -     struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
>> -
>> -     if (e) {
>> -             tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
>> -                       rxhash, queue_index);
>> -             e->updated = jiffies;
>> -             e->rxhash = rxhash;
>> -             e->queue_index = queue_index;
>> -             e->tun = tun;
>> -             hlist_add_head_rcu(&e->hash_link, head);
>> -             ++tun->flow_count;
>> -     }
>> -     return e;
>> -}
>> -
>> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
>> -{
>> -     tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
>> -               e->rxhash, e->queue_index);
>> -     hlist_del_rcu(&e->hash_link);
>> -     kfree_rcu(e, rcu);
>> -     --tun->flow_count;
>> -}
>> -
>> -static void tun_flow_flush(struct tun_struct *tun)
>> -{
>> -     int i;
>> -
>> -     spin_lock_bh(&tun->lock);
>> -     for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>> -             struct tun_flow_entry *e;
>> -             struct hlist_node *n;
>> -
>> -             hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
>> -                     tun_flow_delete(tun, e);
>> -     }
>> -     spin_unlock_bh(&tun->lock);
>> -}
>> -
>> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
>> -{
>> -     int i;
>> -
>> -     spin_lock_bh(&tun->lock);
>> -     for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>> -             struct tun_flow_entry *e;
>> -             struct hlist_node *n;
>> -
>> -             hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>> -                     if (e->queue_index == queue_index)
>> -                             tun_flow_delete(tun, e);
>> -             }
>> -     }
>> -     spin_unlock_bh(&tun->lock);
>> -}
>> -
>> -static void tun_flow_cleanup(unsigned long data)
>> -{
>> -     struct tun_struct *tun = (struct tun_struct *)data;
>> -     unsigned long delay = tun->ageing_time;
>> -     unsigned long next_timer = jiffies + delay;
>> -     unsigned long count = 0;
>> -     int i;
>> -
>> -     tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
>> -
>> -     spin_lock_bh(&tun->lock);
>> -     for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>> -             struct tun_flow_entry *e;
>> -             struct hlist_node *n;
>> -
>> -             hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>> -                     unsigned long this_timer;
>> -                     count++;
>> -                     this_timer = e->updated + delay;
>> -                     if (time_before_eq(this_timer, jiffies))
>> -                             tun_flow_delete(tun, e);
>> -                     else if (time_before(this_timer, next_timer))
>> -                             next_timer = this_timer;
>> -             }
>> -     }
>> -
>> -     if (count)
>> -             mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
>> -     spin_unlock_bh(&tun->lock);
>> -}
>> -
>> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
>> -                         struct tun_file *tfile)
>> -{
>> -     struct hlist_head *head;
>> -     struct tun_flow_entry *e;
>> -     unsigned long delay = tun->ageing_time;
>> -     u16 queue_index = tfile->queue_index;
>> -
>> -     if (!rxhash)
>> -             return;
>> -     else
>> -             head = &tun->flows[tun_hashfn(rxhash)];
>> -
>> -     rcu_read_lock();
>> -
>> -     /* We may get a very small possibility of OOO during switching, not
>> -      * worth to optimize.*/
>> -     if (tun->numqueues == 1 || tfile->detached)
>> -             goto unlock;
>> -
>> -     e = tun_flow_find(head, rxhash);
>> -     if (likely(e)) {
>> -             /* TODO: keep queueing to old queue until it's empty? */
>> -             e->queue_index = queue_index;
>> -             e->updated = jiffies;
>> -     } else {
>> -             spin_lock_bh(&tun->lock);
>> -             if (!tun_flow_find(head, rxhash) &&
>> -                 tun->flow_count < MAX_TAP_FLOWS)
>> -                     tun_flow_create(tun, head, rxhash, queue_index);
>> -
>> -             if (!timer_pending(&tun->flow_gc_timer))
>> -                     mod_timer(&tun->flow_gc_timer,
>> -                               round_jiffies_up(jiffies + delay));
>> -             spin_unlock_bh(&tun->lock);
>> -     }
>> -
>> -unlock:
>> -     rcu_read_unlock();
>> -}
>> -
>>  /* We try to identify a flow through its rxhash first. The reason that
>>   * we do not check rxq no. is becuase some cards(e.g 82599), chooses
>>   * the rxq based on the txq where the last packet of the flow comes. As
>> @@ -351,7 +190,6 @@ unlock:
>>  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>  {
>>       struct tun_struct *tun = netdev_priv(dev);
>> -     struct tun_flow_entry *e;
>>       u32 txq = 0;
>>       u32 numqueues = 0;
>>
>> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>
>>       txq = skb_get_rxhash(skb);
>>       if (txq) {
>> -             e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>> -             if (e)
>> -                     txq = e->queue_index;
>> -             else
>> +             txq = skb_get_queue_mapping(skb);
>> +             if (!txq) {
>>                       /* use multiply and shift instead of expensive divide */
>>                       txq = ((u64)txq * numqueues) >> 32;
>> +             }
>>       } else if (likely(skb_rx_queue_recorded(skb))) {
>>               txq = skb_get_rx_queue(skb);
>>               while (unlikely(txq >= numqueues))
>> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
>>                       tun_disable_queue(tun, tfile);
>>
>>               synchronize_net();
>> -             tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>>               /* Drop read queue */
>>               tun_queue_purge(tfile);
>>               tun_set_real_num_queues(tun);
>> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
>>  #endif
>>  };
>>
>> -static void tun_flow_init(struct tun_struct *tun)
>> -{
>> -     int i;
>> -
>> -     for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
>> -             INIT_HLIST_HEAD(&tun->flows[i]);
>> -
>> -     tun->ageing_time = TUN_FLOW_EXPIRE;
>> -     setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
>> -     mod_timer(&tun->flow_gc_timer,
>> -               round_jiffies_up(jiffies + tun->ageing_time));
>> -}
>> -
>> -static void tun_flow_uninit(struct tun_struct *tun)
>> -{
>> -     del_timer_sync(&tun->flow_gc_timer);
>> -     tun_flow_flush(tun);
>> -}
>> -
>>  /* Initialize net device. */
>>  static void tun_net_init(struct net_device *dev)
>>  {
>> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>       int copylen;
>>       bool zerocopy = false;
>>       int err;
>> -     u32 rxhash;
>>
>>       if (!(tun->flags & TUN_NO_PI)) {
>>               if (len < sizeof(pi))
>> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>       skb_reset_network_header(skb);
>>       skb_probe_transport_header(skb, 0);
>>
>> -     rxhash = skb_get_rxhash(skb);
>> +     skb_set_queue_mapping(skb, tfile->queue_index);
>>       netif_rx_ni(skb);
>>
>>       tun->dev->stats.rx_packets++;
>>       tun->dev->stats.rx_bytes += len;
>>
>> -     tun_flow_update(tun, rxhash, tfile);
>>       return total_len;
>>  }
>>
>> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
>>       struct tun_struct *tun = netdev_priv(dev);
>>
>>       BUG_ON(!(list_empty(&tun->disabled)));
>> -     tun_flow_uninit(tun);
>>       security_tun_dev_free_security(tun->security);
>>       free_netdev(dev);
>>  }
>> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>                       goto err_free_dev;
>>
>>               tun_net_init(dev);
>> -             tun_flow_init(tun);
>>
>>               dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>>                                  TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
>> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>               INIT_LIST_HEAD(&tun->disabled);
>>               err = tun_attach(tun, file, false);
>>               if (err < 0)
>> -                     goto err_free_flow;
>> +                     goto err_free_security;
>>
>>               err = register_netdevice(tun->dev);
>>               if (err < 0)
>> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>
>>  err_detach:
>>       tun_detach_all(dev);
>> -err_free_flow:
>> -     tun_flow_uninit(tun);
>> +err_free_security:
>>       security_tun_dev_free_security(tun->security);
>>  err_free_dev:
>>       free_netdev(dev);
>



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17  9:13   ` Zhi Yong Wu
@ 2013-12-17 10:05     ` Jason Wang
  2013-12-18  2:08       ` Zhi Yong Wu
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Wang @ 2013-12-17 10:05 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: netdev, linux-kernel mlist, David S. Miller, Michael S. Tsirkin,
	Zhi Yong Wu

On 12/17/2013 05:13 PM, Zhi Yong Wu wrote:
> On Tue, Dec 17, 2013 at 4:49 PM, Jason Wang <jasowang@redhat.com> wrote:
>> > On 12/17/2013 03:26 PM, Zhi Yong Wu wrote:
>>> >> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>> >>
>>> >> The flow cache is an extremely broken concept, and it usually brings up
>>> >> growth issues and DoS attacks, so this patch is trying to remove it from
>>> >> the tuntap driver, and insteadly use a simpler way for its flow control.
>> >
>> > NACK.
>> >
>> > This single function revert does not make sense to me. Since:
> IIRC, the tuntap flow cache is only used to save the mapping of skb
> packet <-> queue index. My idea only save the queue index in skb_buff
> early when skb buffer is filled, not in flow cache as the current
> code. This method is actually more simpler and completely doesn't need
> any flow cache.

Nope. Flow caches record the flow to queues mapping like what most
multiqueue nic does. The only difference is tun record it silently while
most nic needs driver to tell the mapping.

What your patch does is:
- set the queue mapping of skb during tun_get_user(). But most drivers
using XPS or processor id to select the real tx queue. So the real txq
depends on the cpu that vhost or qemu is running. This setting does not
have any effect in fact.
- the queue mapping of skb were fetched during tun_select_queue(). This
value is usually set by a multiqueue nic to record which hardware rxq
was this packet came.

Can you explain how your patch works exactly?
>> >
>> > - You in fact removes the flow steering function in tun. We definitely
>> > need something like this to unbreak the TCP performance in a multiqueue
> I don't think it will downgrade the TCP perf even in mq guest, but my
> idea maybe has better TCP perf, because it doesn't have any cache
> table lookup, etc.

Did you test and compare the performance numbers? Did you run profiler
to see how much does the lookup cost?
>> > guest. Please have a look at the virtio-net driver / virtio sepc for
>> > more information.
>> > - The total number of flow caches were limited to 4096, so there's no
>> > DoS or growth issue.
> Can you check why the ipv4 routing cache is removed? maybe i miss
> something, if yes, pls correct me. :)

The main differences is that the flow caches were best effort. Tun can
not store all flows to queue mapping, and even a hardware nic can not do
this. If a packet misses the flow cache, it's safe to distribute it
randomly or through another method. So the limitation just work.

Could you please explain the DoS or growth issue you meet here?
>> > - The only issue is scalability, but fixing this is not easy. We can
>> > just use arrays/indirection table like RSS instead of hash buckets, it
>> > saves some time in linear search but has other issues like collision
>> > - I've also had a RFC of using aRFS in the past, it also has several
>> > drawbacks such as busy looping in the networking hotspot.
>> >
>> > So in conclusion, we need flow steering in tun, just removing current
>> > method does not help. The proper way is to expose several different
>> > methods to user and let user to choose the preferable mechanism like
>> > packet.
> By the way, let us look at what other networking guys think of this,
> such as MST, dave, etc. :)
>

Of course.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17 10:05     ` Jason Wang
@ 2013-12-18  2:08       ` Zhi Yong Wu
  2013-12-23  7:00         ` Jason Wang
  0 siblings, 1 reply; 12+ messages in thread
From: Zhi Yong Wu @ 2013-12-18  2:08 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel mlist, David S. Miller, Michael S. Tsirkin,
	Zhi Yong Wu

On Tue, Dec 17, 2013 at 6:05 PM, Jason Wang <jasowang@redhat.com> wrote:
> On 12/17/2013 05:13 PM, Zhi Yong Wu wrote:
>> On Tue, Dec 17, 2013 at 4:49 PM, Jason Wang <jasowang@redhat.com> wrote:
>>> > On 12/17/2013 03:26 PM, Zhi Yong Wu wrote:
>>>> >> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>> >>
>>>> >> The flow cache is an extremely broken concept, and it usually brings up
>>>> >> growth issues and DoS attacks, so this patch is trying to remove it from
>>>> >> the tuntap driver, and insteadly use a simpler way for its flow control.
>>> >
>>> > NACK.
>>> >
>>> > This single function revert does not make sense to me. Since:
>> IIRC, the tuntap flow cache is only used to save the mapping of skb
>> packet <-> queue index. My idea only save the queue index in skb_buff
>> early when skb buffer is filled, not in flow cache as the current
>> code. This method is actually more simpler and completely doesn't need
>> any flow cache.
>
> Nope. Flow caches record the flow to queues mapping like what most
> multiqueue nic does. The only difference is tun record it silently while
> most nic needs driver to tell the mapping.
Just check virtio specs, i seem to miss the fact that flow cache
enable packet steering in mq mode, thanks for your comments. But i
have some concerns about some of your comments.
>
> What your patch does is:
> - set the queue mapping of skb during tun_get_user(). But most drivers
> using XPS or processor id to select the real tx queue. So the real txq
> depends on the cpu that vhost or qemu is running. This setting does not
Doesn't those drivers invoke netdev_pick_tx() or its counterpart to
select real tx queue? e.g. tun_select_queue(). or can you say it with
an example?
Moreover, how do those drivers know which cpu vhost or qemu is running on?
> have any effect in fact.
> - the queue mapping of skb were fetched during tun_select_queue(). This
> value is usually set by a multiqueue nic to record which hardware rxq
> was this packet came.
ah? Can you let me know where a mq nic controller set it?
>
> Can you explain how your patch works exactly?
You have understood it.
>>> >
>>> > - You in fact removes the flow steering function in tun. We definitely
>>> > need something like this to unbreak the TCP performance in a multiqueue
>> I don't think it will downgrade the TCP perf even in mq guest, but my
>> idea maybe has better TCP perf, because it doesn't have any cache
>> table lookup, etc.
>
> Did you test and compare the performance numbers? Did you run profiler
> to see how much does the lookup cost?
No, As i jus said above, i miss that flow cache can enable packet
steering. But Did you do related perf testing? To be honest, i am
wondering how much perf the packet steering can improve. Actually it
also injects a lot of cache lookup cost.
>>> > guest. Please have a look at the virtio-net driver / virtio sepc for
>>> > more information.
>>> > - The total number of flow caches were limited to 4096, so there's no
>>> > DoS or growth issue.
>> Can you check why the ipv4 routing cache is removed? maybe i miss
>> something, if yes, pls correct me. :)
>
> The main differences is that the flow caches were best effort. Tun can
> not store all flows to queue mapping, and even a hardware nic can not do
> this. If a packet misses the flow cache, it's safe to distribute it
> randomly or through another method. So the limitation just work.
Exactly, we can know this from tun_select_queue().
>
> Could you please explain the DoS or growth issue you meet here?
>>> > - The only issue is scalability, but fixing this is not easy. We can
>>> > just use arrays/indirection table like RSS instead of hash buckets, it
>>> > saves some time in linear search but has other issues like collision
>>> > - I've also had a RFC of using aRFS in the past, it also has several
>>> > drawbacks such as busy looping in the networking hotspot.
>>> >
>>> > So in conclusion, we need flow steering in tun, just removing current
>>> > method does not help. The proper way is to expose several different
>>> > methods to user and let user to choose the preferable mechanism like
>>> > packet.
>> By the way, let us look at what other networking guys think of this,
>> such as MST, dave, etc. :)
>>
>
> Of course.



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-17  7:26 [RFC PATCH] net, tun: remove the flow cache Zhi Yong Wu
  2013-12-17  7:47 ` Stephen Hemminger
  2013-12-17  8:49 ` Jason Wang
@ 2013-12-18  4:06 ` Tom Herbert
  2013-12-18  4:37   ` Zhi Yong Wu
  2 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2013-12-18  4:06 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Linux Netdev List, LKML, David Miller, Michael S. Tsirkin,
	Jason Wang, Zhi Yong Wu

On Mon, Dec 16, 2013 at 11:26 PM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>
> The flow cache is an extremely broken concept, and it usually brings up
> growth issues and DoS attacks, so this patch is trying to remove it from
> the tuntap driver, and insteadly use a simpler way for its flow control.
>
Yes , in it's current state it's broken. But maybe we can try to fix
it instead of arbitrarily removing it. Please see my patches on
plumbing RFS into tuntap which may start to make it useful.

Tom

> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> ---
>  drivers/net/tun.c |  208 +++--------------------------------------------------
>  1 files changed, 10 insertions(+), 198 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 7c8343a..7c27fdc 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -32,12 +32,15 @@
>   *
>   *  Daniel Podlejski <underley@underley.eu.org>
>   *    Modifications for 2.3.99-pre5 kernel.
> + *
> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> + *    Remove the flow cache.
>   */
>
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
>  #define DRV_NAME       "tun"
> -#define DRV_VERSION    "1.6"
> +#define DRV_VERSION    "1.7"
>  #define DRV_DESCRIPTION        "Universal TUN/TAP device driver"
>  #define DRV_COPYRIGHT  "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
>
> @@ -146,18 +149,6 @@ struct tun_file {
>         struct tun_struct *detached;
>  };
>
> -struct tun_flow_entry {
> -       struct hlist_node hash_link;
> -       struct rcu_head rcu;
> -       struct tun_struct *tun;
> -
> -       u32 rxhash;
> -       int queue_index;
> -       unsigned long updated;
> -};
> -
> -#define TUN_NUM_FLOW_ENTRIES 1024
> -
>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>   * device, socket filter, sndbuf and vnet header size were restore when the
>   * file were attached to a persist device.
> @@ -184,163 +175,11 @@ struct tun_struct {
>         int debug;
>  #endif
>         spinlock_t lock;
> -       struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
> -       struct timer_list flow_gc_timer;
> -       unsigned long ageing_time;
>         unsigned int numdisabled;
>         struct list_head disabled;
>         void *security;
> -       u32 flow_count;
>  };
>
> -static inline u32 tun_hashfn(u32 rxhash)
> -{
> -       return rxhash & 0x3ff;
> -}
> -
> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
> -{
> -       struct tun_flow_entry *e;
> -
> -       hlist_for_each_entry_rcu(e, head, hash_link) {
> -               if (e->rxhash == rxhash)
> -                       return e;
> -       }
> -       return NULL;
> -}
> -
> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
> -                                             struct hlist_head *head,
> -                                             u32 rxhash, u16 queue_index)
> -{
> -       struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
> -
> -       if (e) {
> -               tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
> -                         rxhash, queue_index);
> -               e->updated = jiffies;
> -               e->rxhash = rxhash;
> -               e->queue_index = queue_index;
> -               e->tun = tun;
> -               hlist_add_head_rcu(&e->hash_link, head);
> -               ++tun->flow_count;
> -       }
> -       return e;
> -}
> -
> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
> -{
> -       tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
> -                 e->rxhash, e->queue_index);
> -       hlist_del_rcu(&e->hash_link);
> -       kfree_rcu(e, rcu);
> -       --tun->flow_count;
> -}
> -
> -static void tun_flow_flush(struct tun_struct *tun)
> -{
> -       int i;
> -
> -       spin_lock_bh(&tun->lock);
> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
> -               struct tun_flow_entry *e;
> -               struct hlist_node *n;
> -
> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
> -                       tun_flow_delete(tun, e);
> -       }
> -       spin_unlock_bh(&tun->lock);
> -}
> -
> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
> -{
> -       int i;
> -
> -       spin_lock_bh(&tun->lock);
> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
> -               struct tun_flow_entry *e;
> -               struct hlist_node *n;
> -
> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
> -                       if (e->queue_index == queue_index)
> -                               tun_flow_delete(tun, e);
> -               }
> -       }
> -       spin_unlock_bh(&tun->lock);
> -}
> -
> -static void tun_flow_cleanup(unsigned long data)
> -{
> -       struct tun_struct *tun = (struct tun_struct *)data;
> -       unsigned long delay = tun->ageing_time;
> -       unsigned long next_timer = jiffies + delay;
> -       unsigned long count = 0;
> -       int i;
> -
> -       tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
> -
> -       spin_lock_bh(&tun->lock);
> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
> -               struct tun_flow_entry *e;
> -               struct hlist_node *n;
> -
> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
> -                       unsigned long this_timer;
> -                       count++;
> -                       this_timer = e->updated + delay;
> -                       if (time_before_eq(this_timer, jiffies))
> -                               tun_flow_delete(tun, e);
> -                       else if (time_before(this_timer, next_timer))
> -                               next_timer = this_timer;
> -               }
> -       }
> -
> -       if (count)
> -               mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
> -       spin_unlock_bh(&tun->lock);
> -}
> -
> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
> -                           struct tun_file *tfile)
> -{
> -       struct hlist_head *head;
> -       struct tun_flow_entry *e;
> -       unsigned long delay = tun->ageing_time;
> -       u16 queue_index = tfile->queue_index;
> -
> -       if (!rxhash)
> -               return;
> -       else
> -               head = &tun->flows[tun_hashfn(rxhash)];
> -
> -       rcu_read_lock();
> -
> -       /* We may get a very small possibility of OOO during switching, not
> -        * worth to optimize.*/
> -       if (tun->numqueues == 1 || tfile->detached)
> -               goto unlock;
> -
> -       e = tun_flow_find(head, rxhash);
> -       if (likely(e)) {
> -               /* TODO: keep queueing to old queue until it's empty? */
> -               e->queue_index = queue_index;
> -               e->updated = jiffies;
> -       } else {
> -               spin_lock_bh(&tun->lock);
> -               if (!tun_flow_find(head, rxhash) &&
> -                   tun->flow_count < MAX_TAP_FLOWS)
> -                       tun_flow_create(tun, head, rxhash, queue_index);
> -
> -               if (!timer_pending(&tun->flow_gc_timer))
> -                       mod_timer(&tun->flow_gc_timer,
> -                                 round_jiffies_up(jiffies + delay));
> -               spin_unlock_bh(&tun->lock);
> -       }
> -
> -unlock:
> -       rcu_read_unlock();
> -}
> -
>  /* We try to identify a flow through its rxhash first. The reason that
>   * we do not check rxq no. is becuase some cards(e.g 82599), chooses
>   * the rxq based on the txq where the last packet of the flow comes. As
> @@ -351,7 +190,6 @@ unlock:
>  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>  {
>         struct tun_struct *tun = netdev_priv(dev);
> -       struct tun_flow_entry *e;
>         u32 txq = 0;
>         u32 numqueues = 0;
>
> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>
>         txq = skb_get_rxhash(skb);
>         if (txq) {
> -               e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
> -               if (e)
> -                       txq = e->queue_index;
> -               else
> +               txq = skb_get_queue_mapping(skb);
> +               if (!txq) {
>                         /* use multiply and shift instead of expensive divide */
>                         txq = ((u64)txq * numqueues) >> 32;
> +               }
>         } else if (likely(skb_rx_queue_recorded(skb))) {
>                 txq = skb_get_rx_queue(skb);
>                 while (unlikely(txq >= numqueues))
> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
>                         tun_disable_queue(tun, tfile);
>
>                 synchronize_net();
> -               tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>                 /* Drop read queue */
>                 tun_queue_purge(tfile);
>                 tun_set_real_num_queues(tun);
> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
>  #endif
>  };
>
> -static void tun_flow_init(struct tun_struct *tun)
> -{
> -       int i;
> -
> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
> -               INIT_HLIST_HEAD(&tun->flows[i]);
> -
> -       tun->ageing_time = TUN_FLOW_EXPIRE;
> -       setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
> -       mod_timer(&tun->flow_gc_timer,
> -                 round_jiffies_up(jiffies + tun->ageing_time));
> -}
> -
> -static void tun_flow_uninit(struct tun_struct *tun)
> -{
> -       del_timer_sync(&tun->flow_gc_timer);
> -       tun_flow_flush(tun);
> -}
> -
>  /* Initialize net device. */
>  static void tun_net_init(struct net_device *dev)
>  {
> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>         int copylen;
>         bool zerocopy = false;
>         int err;
> -       u32 rxhash;
>
>         if (!(tun->flags & TUN_NO_PI)) {
>                 if (len < sizeof(pi))
> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>         skb_reset_network_header(skb);
>         skb_probe_transport_header(skb, 0);
>
> -       rxhash = skb_get_rxhash(skb);
> +       skb_set_queue_mapping(skb, tfile->queue_index);
>         netif_rx_ni(skb);
>
>         tun->dev->stats.rx_packets++;
>         tun->dev->stats.rx_bytes += len;
>
> -       tun_flow_update(tun, rxhash, tfile);
>         return total_len;
>  }
>
> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
>         struct tun_struct *tun = netdev_priv(dev);
>
>         BUG_ON(!(list_empty(&tun->disabled)));
> -       tun_flow_uninit(tun);
>         security_tun_dev_free_security(tun->security);
>         free_netdev(dev);
>  }
> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>                         goto err_free_dev;
>
>                 tun_net_init(dev);
> -               tun_flow_init(tun);
>
>                 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>                                    TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>                 INIT_LIST_HEAD(&tun->disabled);
>                 err = tun_attach(tun, file, false);
>                 if (err < 0)
> -                       goto err_free_flow;
> +                       goto err_free_security;
>
>                 err = register_netdevice(tun->dev);
>                 if (err < 0)
> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>
>  err_detach:
>         tun_detach_all(dev);
> -err_free_flow:
> -       tun_flow_uninit(tun);
> +err_free_security:
>         security_tun_dev_free_security(tun->security);
>  err_free_dev:
>         free_netdev(dev);
> --
> 1.7.6.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-18  4:06 ` Tom Herbert
@ 2013-12-18  4:37   ` Zhi Yong Wu
  2013-12-18  4:58     ` Tom Herbert
  0 siblings, 1 reply; 12+ messages in thread
From: Zhi Yong Wu @ 2013-12-18  4:37 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Netdev List, LKML, David Miller, Michael S. Tsirkin,
	Jason Wang, Zhi Yong Wu

HI, Tom,

On Wed, Dec 18, 2013 at 12:06 PM, Tom Herbert <therbert@google.com> wrote:
> On Mon, Dec 16, 2013 at 11:26 PM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>> The flow cache is an extremely broken concept, and it usually brings up
>> growth issues and DoS attacks, so this patch is trying to remove it from
>> the tuntap driver, and insteadly use a simpler way for its flow control.
>>
> Yes , in it's current state it's broken. But maybe we can try to fix
> it instead of arbitrarily removing it. Please see my patches on
> plumbing RFS into tuntap which may start to make it useful.
Do you mean you patch [5/5] tun: Added support for RFS on tun flows?
Sorry, can you say with more details?

>
> Tom
>
>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> ---
>>  drivers/net/tun.c |  208 +++--------------------------------------------------
>>  1 files changed, 10 insertions(+), 198 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 7c8343a..7c27fdc 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -32,12 +32,15 @@
>>   *
>>   *  Daniel Podlejski <underley@underley.eu.org>
>>   *    Modifications for 2.3.99-pre5 kernel.
>> + *
>> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> + *    Remove the flow cache.
>>   */
>>
>>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>>
>>  #define DRV_NAME       "tun"
>> -#define DRV_VERSION    "1.6"
>> +#define DRV_VERSION    "1.7"
>>  #define DRV_DESCRIPTION        "Universal TUN/TAP device driver"
>>  #define DRV_COPYRIGHT  "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
>>
>> @@ -146,18 +149,6 @@ struct tun_file {
>>         struct tun_struct *detached;
>>  };
>>
>> -struct tun_flow_entry {
>> -       struct hlist_node hash_link;
>> -       struct rcu_head rcu;
>> -       struct tun_struct *tun;
>> -
>> -       u32 rxhash;
>> -       int queue_index;
>> -       unsigned long updated;
>> -};
>> -
>> -#define TUN_NUM_FLOW_ENTRIES 1024
>> -
>>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>>   * device, socket filter, sndbuf and vnet header size were restore when the
>>   * file were attached to a persist device.
>> @@ -184,163 +175,11 @@ struct tun_struct {
>>         int debug;
>>  #endif
>>         spinlock_t lock;
>> -       struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>> -       struct timer_list flow_gc_timer;
>> -       unsigned long ageing_time;
>>         unsigned int numdisabled;
>>         struct list_head disabled;
>>         void *security;
>> -       u32 flow_count;
>>  };
>>
>> -static inline u32 tun_hashfn(u32 rxhash)
>> -{
>> -       return rxhash & 0x3ff;
>> -}
>> -
>> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
>> -{
>> -       struct tun_flow_entry *e;
>> -
>> -       hlist_for_each_entry_rcu(e, head, hash_link) {
>> -               if (e->rxhash == rxhash)
>> -                       return e;
>> -       }
>> -       return NULL;
>> -}
>> -
>> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
>> -                                             struct hlist_head *head,
>> -                                             u32 rxhash, u16 queue_index)
>> -{
>> -       struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
>> -
>> -       if (e) {
>> -               tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
>> -                         rxhash, queue_index);
>> -               e->updated = jiffies;
>> -               e->rxhash = rxhash;
>> -               e->queue_index = queue_index;
>> -               e->tun = tun;
>> -               hlist_add_head_rcu(&e->hash_link, head);
>> -               ++tun->flow_count;
>> -       }
>> -       return e;
>> -}
>> -
>> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
>> -{
>> -       tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
>> -                 e->rxhash, e->queue_index);
>> -       hlist_del_rcu(&e->hash_link);
>> -       kfree_rcu(e, rcu);
>> -       --tun->flow_count;
>> -}
>> -
>> -static void tun_flow_flush(struct tun_struct *tun)
>> -{
>> -       int i;
>> -
>> -       spin_lock_bh(&tun->lock);
>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>> -               struct tun_flow_entry *e;
>> -               struct hlist_node *n;
>> -
>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
>> -                       tun_flow_delete(tun, e);
>> -       }
>> -       spin_unlock_bh(&tun->lock);
>> -}
>> -
>> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
>> -{
>> -       int i;
>> -
>> -       spin_lock_bh(&tun->lock);
>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>> -               struct tun_flow_entry *e;
>> -               struct hlist_node *n;
>> -
>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>> -                       if (e->queue_index == queue_index)
>> -                               tun_flow_delete(tun, e);
>> -               }
>> -       }
>> -       spin_unlock_bh(&tun->lock);
>> -}
>> -
>> -static void tun_flow_cleanup(unsigned long data)
>> -{
>> -       struct tun_struct *tun = (struct tun_struct *)data;
>> -       unsigned long delay = tun->ageing_time;
>> -       unsigned long next_timer = jiffies + delay;
>> -       unsigned long count = 0;
>> -       int i;
>> -
>> -       tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
>> -
>> -       spin_lock_bh(&tun->lock);
>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>> -               struct tun_flow_entry *e;
>> -               struct hlist_node *n;
>> -
>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>> -                       unsigned long this_timer;
>> -                       count++;
>> -                       this_timer = e->updated + delay;
>> -                       if (time_before_eq(this_timer, jiffies))
>> -                               tun_flow_delete(tun, e);
>> -                       else if (time_before(this_timer, next_timer))
>> -                               next_timer = this_timer;
>> -               }
>> -       }
>> -
>> -       if (count)
>> -               mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
>> -       spin_unlock_bh(&tun->lock);
>> -}
>> -
>> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
>> -                           struct tun_file *tfile)
>> -{
>> -       struct hlist_head *head;
>> -       struct tun_flow_entry *e;
>> -       unsigned long delay = tun->ageing_time;
>> -       u16 queue_index = tfile->queue_index;
>> -
>> -       if (!rxhash)
>> -               return;
>> -       else
>> -               head = &tun->flows[tun_hashfn(rxhash)];
>> -
>> -       rcu_read_lock();
>> -
>> -       /* We may get a very small possibility of OOO during switching, not
>> -        * worth to optimize.*/
>> -       if (tun->numqueues == 1 || tfile->detached)
>> -               goto unlock;
>> -
>> -       e = tun_flow_find(head, rxhash);
>> -       if (likely(e)) {
>> -               /* TODO: keep queueing to old queue until it's empty? */
>> -               e->queue_index = queue_index;
>> -               e->updated = jiffies;
>> -       } else {
>> -               spin_lock_bh(&tun->lock);
>> -               if (!tun_flow_find(head, rxhash) &&
>> -                   tun->flow_count < MAX_TAP_FLOWS)
>> -                       tun_flow_create(tun, head, rxhash, queue_index);
>> -
>> -               if (!timer_pending(&tun->flow_gc_timer))
>> -                       mod_timer(&tun->flow_gc_timer,
>> -                                 round_jiffies_up(jiffies + delay));
>> -               spin_unlock_bh(&tun->lock);
>> -       }
>> -
>> -unlock:
>> -       rcu_read_unlock();
>> -}
>> -
>>  /* We try to identify a flow through its rxhash first. The reason that
>>   * we do not check rxq no. is becuase some cards(e.g 82599), chooses
>>   * the rxq based on the txq where the last packet of the flow comes. As
>> @@ -351,7 +190,6 @@ unlock:
>>  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>  {
>>         struct tun_struct *tun = netdev_priv(dev);
>> -       struct tun_flow_entry *e;
>>         u32 txq = 0;
>>         u32 numqueues = 0;
>>
>> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>
>>         txq = skb_get_rxhash(skb);
>>         if (txq) {
>> -               e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>> -               if (e)
>> -                       txq = e->queue_index;
>> -               else
>> +               txq = skb_get_queue_mapping(skb);
>> +               if (!txq) {
>>                         /* use multiply and shift instead of expensive divide */
>>                         txq = ((u64)txq * numqueues) >> 32;
>> +               }
>>         } else if (likely(skb_rx_queue_recorded(skb))) {
>>                 txq = skb_get_rx_queue(skb);
>>                 while (unlikely(txq >= numqueues))
>> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
>>                         tun_disable_queue(tun, tfile);
>>
>>                 synchronize_net();
>> -               tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>>                 /* Drop read queue */
>>                 tun_queue_purge(tfile);
>>                 tun_set_real_num_queues(tun);
>> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
>>  #endif
>>  };
>>
>> -static void tun_flow_init(struct tun_struct *tun)
>> -{
>> -       int i;
>> -
>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
>> -               INIT_HLIST_HEAD(&tun->flows[i]);
>> -
>> -       tun->ageing_time = TUN_FLOW_EXPIRE;
>> -       setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
>> -       mod_timer(&tun->flow_gc_timer,
>> -                 round_jiffies_up(jiffies + tun->ageing_time));
>> -}
>> -
>> -static void tun_flow_uninit(struct tun_struct *tun)
>> -{
>> -       del_timer_sync(&tun->flow_gc_timer);
>> -       tun_flow_flush(tun);
>> -}
>> -
>>  /* Initialize net device. */
>>  static void tun_net_init(struct net_device *dev)
>>  {
>> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>         int copylen;
>>         bool zerocopy = false;
>>         int err;
>> -       u32 rxhash;
>>
>>         if (!(tun->flags & TUN_NO_PI)) {
>>                 if (len < sizeof(pi))
>> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>         skb_reset_network_header(skb);
>>         skb_probe_transport_header(skb, 0);
>>
>> -       rxhash = skb_get_rxhash(skb);
>> +       skb_set_queue_mapping(skb, tfile->queue_index);
>>         netif_rx_ni(skb);
>>
>>         tun->dev->stats.rx_packets++;
>>         tun->dev->stats.rx_bytes += len;
>>
>> -       tun_flow_update(tun, rxhash, tfile);
>>         return total_len;
>>  }
>>
>> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
>>         struct tun_struct *tun = netdev_priv(dev);
>>
>>         BUG_ON(!(list_empty(&tun->disabled)));
>> -       tun_flow_uninit(tun);
>>         security_tun_dev_free_security(tun->security);
>>         free_netdev(dev);
>>  }
>> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>                         goto err_free_dev;
>>
>>                 tun_net_init(dev);
>> -               tun_flow_init(tun);
>>
>>                 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>>                                    TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
>> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>                 INIT_LIST_HEAD(&tun->disabled);
>>                 err = tun_attach(tun, file, false);
>>                 if (err < 0)
>> -                       goto err_free_flow;
>> +                       goto err_free_security;
>>
>>                 err = register_netdevice(tun->dev);
>>                 if (err < 0)
>> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>
>>  err_detach:
>>         tun_detach_all(dev);
>> -err_free_flow:
>> -       tun_flow_uninit(tun);
>> +err_free_security:
>>         security_tun_dev_free_security(tun->security);
>>  err_free_dev:
>>         free_netdev(dev);
>> --
>> 1.7.6.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-18  4:37   ` Zhi Yong Wu
@ 2013-12-18  4:58     ` Tom Herbert
  2013-12-18  5:23       ` Zhi Yong Wu
  0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2013-12-18  4:58 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Linux Netdev List, LKML, David Miller, Michael S. Tsirkin,
	Jason Wang, Zhi Yong Wu

>> Yes , in it's current state it's broken. But maybe we can try to fix
>> it instead of arbitrarily removing it. Please see my patches on
>> plumbing RFS into tuntap which may start to make it useful.
> Do you mean you patch [5/5] tun: Added support for RFS on tun flows?
> Sorry, can you say with more details?

Correct. It was RFC since I didn't have a good way to test, if you do
please try it and see if there's any effect. We should also be able to
do something similar for KVM guests, either doing the flow lookup on
each packet from the guest, or use aRFS interface from the guest
driver for end to end RFS (more exciting prospect). We are finding
that guest to driver accelerations like this (and tso, lro) are quite
important in getting virtual networking performance up.

>
>>
>> Tom
>>
>>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>> ---
>>>  drivers/net/tun.c |  208 +++--------------------------------------------------
>>>  1 files changed, 10 insertions(+), 198 deletions(-)
>>>
>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>> index 7c8343a..7c27fdc 100644
>>> --- a/drivers/net/tun.c
>>> +++ b/drivers/net/tun.c
>>> @@ -32,12 +32,15 @@
>>>   *
>>>   *  Daniel Podlejski <underley@underley.eu.org>
>>>   *    Modifications for 2.3.99-pre5 kernel.
>>> + *
>>> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>> + *    Remove the flow cache.
>>>   */
>>>
>>>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>>>
>>>  #define DRV_NAME       "tun"
>>> -#define DRV_VERSION    "1.6"
>>> +#define DRV_VERSION    "1.7"
>>>  #define DRV_DESCRIPTION        "Universal TUN/TAP device driver"
>>>  #define DRV_COPYRIGHT  "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
>>>
>>> @@ -146,18 +149,6 @@ struct tun_file {
>>>         struct tun_struct *detached;
>>>  };
>>>
>>> -struct tun_flow_entry {
>>> -       struct hlist_node hash_link;
>>> -       struct rcu_head rcu;
>>> -       struct tun_struct *tun;
>>> -
>>> -       u32 rxhash;
>>> -       int queue_index;
>>> -       unsigned long updated;
>>> -};
>>> -
>>> -#define TUN_NUM_FLOW_ENTRIES 1024
>>> -
>>>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>>>   * device, socket filter, sndbuf and vnet header size were restore when the
>>>   * file were attached to a persist device.
>>> @@ -184,163 +175,11 @@ struct tun_struct {
>>>         int debug;
>>>  #endif
>>>         spinlock_t lock;
>>> -       struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>>> -       struct timer_list flow_gc_timer;
>>> -       unsigned long ageing_time;
>>>         unsigned int numdisabled;
>>>         struct list_head disabled;
>>>         void *security;
>>> -       u32 flow_count;
>>>  };
>>>
>>> -static inline u32 tun_hashfn(u32 rxhash)
>>> -{
>>> -       return rxhash & 0x3ff;
>>> -}
>>> -
>>> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
>>> -{
>>> -       struct tun_flow_entry *e;
>>> -
>>> -       hlist_for_each_entry_rcu(e, head, hash_link) {
>>> -               if (e->rxhash == rxhash)
>>> -                       return e;
>>> -       }
>>> -       return NULL;
>>> -}
>>> -
>>> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
>>> -                                             struct hlist_head *head,
>>> -                                             u32 rxhash, u16 queue_index)
>>> -{
>>> -       struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
>>> -
>>> -       if (e) {
>>> -               tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
>>> -                         rxhash, queue_index);
>>> -               e->updated = jiffies;
>>> -               e->rxhash = rxhash;
>>> -               e->queue_index = queue_index;
>>> -               e->tun = tun;
>>> -               hlist_add_head_rcu(&e->hash_link, head);
>>> -               ++tun->flow_count;
>>> -       }
>>> -       return e;
>>> -}
>>> -
>>> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
>>> -{
>>> -       tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
>>> -                 e->rxhash, e->queue_index);
>>> -       hlist_del_rcu(&e->hash_link);
>>> -       kfree_rcu(e, rcu);
>>> -       --tun->flow_count;
>>> -}
>>> -
>>> -static void tun_flow_flush(struct tun_struct *tun)
>>> -{
>>> -       int i;
>>> -
>>> -       spin_lock_bh(&tun->lock);
>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>>> -               struct tun_flow_entry *e;
>>> -               struct hlist_node *n;
>>> -
>>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
>>> -                       tun_flow_delete(tun, e);
>>> -       }
>>> -       spin_unlock_bh(&tun->lock);
>>> -}
>>> -
>>> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
>>> -{
>>> -       int i;
>>> -
>>> -       spin_lock_bh(&tun->lock);
>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>>> -               struct tun_flow_entry *e;
>>> -               struct hlist_node *n;
>>> -
>>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>>> -                       if (e->queue_index == queue_index)
>>> -                               tun_flow_delete(tun, e);
>>> -               }
>>> -       }
>>> -       spin_unlock_bh(&tun->lock);
>>> -}
>>> -
>>> -static void tun_flow_cleanup(unsigned long data)
>>> -{
>>> -       struct tun_struct *tun = (struct tun_struct *)data;
>>> -       unsigned long delay = tun->ageing_time;
>>> -       unsigned long next_timer = jiffies + delay;
>>> -       unsigned long count = 0;
>>> -       int i;
>>> -
>>> -       tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
>>> -
>>> -       spin_lock_bh(&tun->lock);
>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>>> -               struct tun_flow_entry *e;
>>> -               struct hlist_node *n;
>>> -
>>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>>> -                       unsigned long this_timer;
>>> -                       count++;
>>> -                       this_timer = e->updated + delay;
>>> -                       if (time_before_eq(this_timer, jiffies))
>>> -                               tun_flow_delete(tun, e);
>>> -                       else if (time_before(this_timer, next_timer))
>>> -                               next_timer = this_timer;
>>> -               }
>>> -       }
>>> -
>>> -       if (count)
>>> -               mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
>>> -       spin_unlock_bh(&tun->lock);
>>> -}
>>> -
>>> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
>>> -                           struct tun_file *tfile)
>>> -{
>>> -       struct hlist_head *head;
>>> -       struct tun_flow_entry *e;
>>> -       unsigned long delay = tun->ageing_time;
>>> -       u16 queue_index = tfile->queue_index;
>>> -
>>> -       if (!rxhash)
>>> -               return;
>>> -       else
>>> -               head = &tun->flows[tun_hashfn(rxhash)];
>>> -
>>> -       rcu_read_lock();
>>> -
>>> -       /* We may get a very small possibility of OOO during switching, not
>>> -        * worth to optimize.*/
>>> -       if (tun->numqueues == 1 || tfile->detached)
>>> -               goto unlock;
>>> -
>>> -       e = tun_flow_find(head, rxhash);
>>> -       if (likely(e)) {
>>> -               /* TODO: keep queueing to old queue until it's empty? */
>>> -               e->queue_index = queue_index;
>>> -               e->updated = jiffies;
>>> -       } else {
>>> -               spin_lock_bh(&tun->lock);
>>> -               if (!tun_flow_find(head, rxhash) &&
>>> -                   tun->flow_count < MAX_TAP_FLOWS)
>>> -                       tun_flow_create(tun, head, rxhash, queue_index);
>>> -
>>> -               if (!timer_pending(&tun->flow_gc_timer))
>>> -                       mod_timer(&tun->flow_gc_timer,
>>> -                                 round_jiffies_up(jiffies + delay));
>>> -               spin_unlock_bh(&tun->lock);
>>> -       }
>>> -
>>> -unlock:
>>> -       rcu_read_unlock();
>>> -}
>>> -
>>>  /* We try to identify a flow through its rxhash first. The reason that
>>>   * we do not check rxq no. is becuase some cards(e.g 82599), chooses
>>>   * the rxq based on the txq where the last packet of the flow comes. As
>>> @@ -351,7 +190,6 @@ unlock:
>>>  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>>  {
>>>         struct tun_struct *tun = netdev_priv(dev);
>>> -       struct tun_flow_entry *e;
>>>         u32 txq = 0;
>>>         u32 numqueues = 0;
>>>
>>> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>>
>>>         txq = skb_get_rxhash(skb);
>>>         if (txq) {
>>> -               e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>>> -               if (e)
>>> -                       txq = e->queue_index;
>>> -               else
>>> +               txq = skb_get_queue_mapping(skb);
>>> +               if (!txq) {
>>>                         /* use multiply and shift instead of expensive divide */
>>>                         txq = ((u64)txq * numqueues) >> 32;
>>> +               }
>>>         } else if (likely(skb_rx_queue_recorded(skb))) {
>>>                 txq = skb_get_rx_queue(skb);
>>>                 while (unlikely(txq >= numqueues))
>>> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
>>>                         tun_disable_queue(tun, tfile);
>>>
>>>                 synchronize_net();
>>> -               tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>>>                 /* Drop read queue */
>>>                 tun_queue_purge(tfile);
>>>                 tun_set_real_num_queues(tun);
>>> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
>>>  #endif
>>>  };
>>>
>>> -static void tun_flow_init(struct tun_struct *tun)
>>> -{
>>> -       int i;
>>> -
>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
>>> -               INIT_HLIST_HEAD(&tun->flows[i]);
>>> -
>>> -       tun->ageing_time = TUN_FLOW_EXPIRE;
>>> -       setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
>>> -       mod_timer(&tun->flow_gc_timer,
>>> -                 round_jiffies_up(jiffies + tun->ageing_time));
>>> -}
>>> -
>>> -static void tun_flow_uninit(struct tun_struct *tun)
>>> -{
>>> -       del_timer_sync(&tun->flow_gc_timer);
>>> -       tun_flow_flush(tun);
>>> -}
>>> -
>>>  /* Initialize net device. */
>>>  static void tun_net_init(struct net_device *dev)
>>>  {
>>> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>>         int copylen;
>>>         bool zerocopy = false;
>>>         int err;
>>> -       u32 rxhash;
>>>
>>>         if (!(tun->flags & TUN_NO_PI)) {
>>>                 if (len < sizeof(pi))
>>> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>>         skb_reset_network_header(skb);
>>>         skb_probe_transport_header(skb, 0);
>>>
>>> -       rxhash = skb_get_rxhash(skb);
>>> +       skb_set_queue_mapping(skb, tfile->queue_index);
>>>         netif_rx_ni(skb);
>>>
>>>         tun->dev->stats.rx_packets++;
>>>         tun->dev->stats.rx_bytes += len;
>>>
>>> -       tun_flow_update(tun, rxhash, tfile);
>>>         return total_len;
>>>  }
>>>
>>> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
>>>         struct tun_struct *tun = netdev_priv(dev);
>>>
>>>         BUG_ON(!(list_empty(&tun->disabled)));
>>> -       tun_flow_uninit(tun);
>>>         security_tun_dev_free_security(tun->security);
>>>         free_netdev(dev);
>>>  }
>>> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>>                         goto err_free_dev;
>>>
>>>                 tun_net_init(dev);
>>> -               tun_flow_init(tun);
>>>
>>>                 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>>>                                    TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
>>> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>>                 INIT_LIST_HEAD(&tun->disabled);
>>>                 err = tun_attach(tun, file, false);
>>>                 if (err < 0)
>>> -                       goto err_free_flow;
>>> +                       goto err_free_security;
>>>
>>>                 err = register_netdevice(tun->dev);
>>>                 if (err < 0)
>>> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>>
>>>  err_detach:
>>>         tun_detach_all(dev);
>>> -err_free_flow:
>>> -       tun_flow_uninit(tun);
>>> +err_free_security:
>>>         security_tun_dev_free_security(tun->security);
>>>  err_free_dev:
>>>         free_netdev(dev);
>>> --
>>> 1.7.6.5
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
>
> --
> Regards,
>
> Zhi Yong Wu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-18  4:58     ` Tom Herbert
@ 2013-12-18  5:23       ` Zhi Yong Wu
  0 siblings, 0 replies; 12+ messages in thread
From: Zhi Yong Wu @ 2013-12-18  5:23 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Netdev List, LKML, David Miller, Michael S. Tsirkin,
	Jason Wang, Zhi Yong Wu

On Wed, Dec 18, 2013 at 12:58 PM, Tom Herbert <therbert@google.com> wrote:
>>> Yes , in it's current state it's broken. But maybe we can try to fix
>>> it instead of arbitrarily removing it. Please see my patches on
>>> plumbing RFS into tuntap which may start to make it useful.
>> Do you mean you patch [5/5] tun: Added support for RFS on tun flows?
>> Sorry, can you say with more details?
>
> Correct. It was RFC since I didn't have a good way to test, if you do
> please try it and see if there's any effect. We should also be able to
Interesting, i will try to dig it. Sorry, i don't understand why you
can't test. Does it require some special hardware support? or other
facilities?
> do something similar for KVM guests, either doing the flow lookup on
> each packet from the guest, or use aRFS interface from the guest
> driver for end to end RFS (more exciting prospect). We are finding
which two ends do you mean?
> that guest to driver accelerations like this (and tso, lro) are quite
Sorry, i got a bit confused, the driver here mean "virtio_net" or tuntap driver?
> important in getting virtual networking performance up.
>
>>
>>>
>>> Tom
>>>
>>>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>> ---
>>>>  drivers/net/tun.c |  208 +++--------------------------------------------------
>>>>  1 files changed, 10 insertions(+), 198 deletions(-)
>>>>
>>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>>> index 7c8343a..7c27fdc 100644
>>>> --- a/drivers/net/tun.c
>>>> +++ b/drivers/net/tun.c
>>>> @@ -32,12 +32,15 @@
>>>>   *
>>>>   *  Daniel Podlejski <underley@underley.eu.org>
>>>>   *    Modifications for 2.3.99-pre5 kernel.
>>>> + *
>>>> + *  Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>> + *    Remove the flow cache.
>>>>   */
>>>>
>>>>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>>>>
>>>>  #define DRV_NAME       "tun"
>>>> -#define DRV_VERSION    "1.6"
>>>> +#define DRV_VERSION    "1.7"
>>>>  #define DRV_DESCRIPTION        "Universal TUN/TAP device driver"
>>>>  #define DRV_COPYRIGHT  "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
>>>>
>>>> @@ -146,18 +149,6 @@ struct tun_file {
>>>>         struct tun_struct *detached;
>>>>  };
>>>>
>>>> -struct tun_flow_entry {
>>>> -       struct hlist_node hash_link;
>>>> -       struct rcu_head rcu;
>>>> -       struct tun_struct *tun;
>>>> -
>>>> -       u32 rxhash;
>>>> -       int queue_index;
>>>> -       unsigned long updated;
>>>> -};
>>>> -
>>>> -#define TUN_NUM_FLOW_ENTRIES 1024
>>>> -
>>>>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>>>>   * device, socket filter, sndbuf and vnet header size were restore when the
>>>>   * file were attached to a persist device.
>>>> @@ -184,163 +175,11 @@ struct tun_struct {
>>>>         int debug;
>>>>  #endif
>>>>         spinlock_t lock;
>>>> -       struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>>>> -       struct timer_list flow_gc_timer;
>>>> -       unsigned long ageing_time;
>>>>         unsigned int numdisabled;
>>>>         struct list_head disabled;
>>>>         void *security;
>>>> -       u32 flow_count;
>>>>  };
>>>>
>>>> -static inline u32 tun_hashfn(u32 rxhash)
>>>> -{
>>>> -       return rxhash & 0x3ff;
>>>> -}
>>>> -
>>>> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
>>>> -{
>>>> -       struct tun_flow_entry *e;
>>>> -
>>>> -       hlist_for_each_entry_rcu(e, head, hash_link) {
>>>> -               if (e->rxhash == rxhash)
>>>> -                       return e;
>>>> -       }
>>>> -       return NULL;
>>>> -}
>>>> -
>>>> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
>>>> -                                             struct hlist_head *head,
>>>> -                                             u32 rxhash, u16 queue_index)
>>>> -{
>>>> -       struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
>>>> -
>>>> -       if (e) {
>>>> -               tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
>>>> -                         rxhash, queue_index);
>>>> -               e->updated = jiffies;
>>>> -               e->rxhash = rxhash;
>>>> -               e->queue_index = queue_index;
>>>> -               e->tun = tun;
>>>> -               hlist_add_head_rcu(&e->hash_link, head);
>>>> -               ++tun->flow_count;
>>>> -       }
>>>> -       return e;
>>>> -}
>>>> -
>>>> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
>>>> -{
>>>> -       tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
>>>> -                 e->rxhash, e->queue_index);
>>>> -       hlist_del_rcu(&e->hash_link);
>>>> -       kfree_rcu(e, rcu);
>>>> -       --tun->flow_count;
>>>> -}
>>>> -
>>>> -static void tun_flow_flush(struct tun_struct *tun)
>>>> -{
>>>> -       int i;
>>>> -
>>>> -       spin_lock_bh(&tun->lock);
>>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>>>> -               struct tun_flow_entry *e;
>>>> -               struct hlist_node *n;
>>>> -
>>>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
>>>> -                       tun_flow_delete(tun, e);
>>>> -       }
>>>> -       spin_unlock_bh(&tun->lock);
>>>> -}
>>>> -
>>>> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
>>>> -{
>>>> -       int i;
>>>> -
>>>> -       spin_lock_bh(&tun->lock);
>>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>>>> -               struct tun_flow_entry *e;
>>>> -               struct hlist_node *n;
>>>> -
>>>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>>>> -                       if (e->queue_index == queue_index)
>>>> -                               tun_flow_delete(tun, e);
>>>> -               }
>>>> -       }
>>>> -       spin_unlock_bh(&tun->lock);
>>>> -}
>>>> -
>>>> -static void tun_flow_cleanup(unsigned long data)
>>>> -{
>>>> -       struct tun_struct *tun = (struct tun_struct *)data;
>>>> -       unsigned long delay = tun->ageing_time;
>>>> -       unsigned long next_timer = jiffies + delay;
>>>> -       unsigned long count = 0;
>>>> -       int i;
>>>> -
>>>> -       tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
>>>> -
>>>> -       spin_lock_bh(&tun->lock);
>>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
>>>> -               struct tun_flow_entry *e;
>>>> -               struct hlist_node *n;
>>>> -
>>>> -               hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
>>>> -                       unsigned long this_timer;
>>>> -                       count++;
>>>> -                       this_timer = e->updated + delay;
>>>> -                       if (time_before_eq(this_timer, jiffies))
>>>> -                               tun_flow_delete(tun, e);
>>>> -                       else if (time_before(this_timer, next_timer))
>>>> -                               next_timer = this_timer;
>>>> -               }
>>>> -       }
>>>> -
>>>> -       if (count)
>>>> -               mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
>>>> -       spin_unlock_bh(&tun->lock);
>>>> -}
>>>> -
>>>> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
>>>> -                           struct tun_file *tfile)
>>>> -{
>>>> -       struct hlist_head *head;
>>>> -       struct tun_flow_entry *e;
>>>> -       unsigned long delay = tun->ageing_time;
>>>> -       u16 queue_index = tfile->queue_index;
>>>> -
>>>> -       if (!rxhash)
>>>> -               return;
>>>> -       else
>>>> -               head = &tun->flows[tun_hashfn(rxhash)];
>>>> -
>>>> -       rcu_read_lock();
>>>> -
>>>> -       /* We may get a very small possibility of OOO during switching, not
>>>> -        * worth to optimize.*/
>>>> -       if (tun->numqueues == 1 || tfile->detached)
>>>> -               goto unlock;
>>>> -
>>>> -       e = tun_flow_find(head, rxhash);
>>>> -       if (likely(e)) {
>>>> -               /* TODO: keep queueing to old queue until it's empty? */
>>>> -               e->queue_index = queue_index;
>>>> -               e->updated = jiffies;
>>>> -       } else {
>>>> -               spin_lock_bh(&tun->lock);
>>>> -               if (!tun_flow_find(head, rxhash) &&
>>>> -                   tun->flow_count < MAX_TAP_FLOWS)
>>>> -                       tun_flow_create(tun, head, rxhash, queue_index);
>>>> -
>>>> -               if (!timer_pending(&tun->flow_gc_timer))
>>>> -                       mod_timer(&tun->flow_gc_timer,
>>>> -                                 round_jiffies_up(jiffies + delay));
>>>> -               spin_unlock_bh(&tun->lock);
>>>> -       }
>>>> -
>>>> -unlock:
>>>> -       rcu_read_unlock();
>>>> -}
>>>> -
>>>>  /* We try to identify a flow through its rxhash first. The reason that
>>>>   * we do not check rxq no. is becuase some cards(e.g 82599), chooses
>>>>   * the rxq based on the txq where the last packet of the flow comes. As
>>>> @@ -351,7 +190,6 @@ unlock:
>>>>  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>>>  {
>>>>         struct tun_struct *tun = netdev_priv(dev);
>>>> -       struct tun_flow_entry *e;
>>>>         u32 txq = 0;
>>>>         u32 numqueues = 0;
>>>>
>>>> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>>>>
>>>>         txq = skb_get_rxhash(skb);
>>>>         if (txq) {
>>>> -               e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>>>> -               if (e)
>>>> -                       txq = e->queue_index;
>>>> -               else
>>>> +               txq = skb_get_queue_mapping(skb);
>>>> +               if (!txq) {
>>>>                         /* use multiply and shift instead of expensive divide */
>>>>                         txq = ((u64)txq * numqueues) >> 32;
>>>> +               }
>>>>         } else if (likely(skb_rx_queue_recorded(skb))) {
>>>>                 txq = skb_get_rx_queue(skb);
>>>>                 while (unlikely(txq >= numqueues))
>>>> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
>>>>                         tun_disable_queue(tun, tfile);
>>>>
>>>>                 synchronize_net();
>>>> -               tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>>>>                 /* Drop read queue */
>>>>                 tun_queue_purge(tfile);
>>>>                 tun_set_real_num_queues(tun);
>>>> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = {
>>>>  #endif
>>>>  };
>>>>
>>>> -static void tun_flow_init(struct tun_struct *tun)
>>>> -{
>>>> -       int i;
>>>> -
>>>> -       for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
>>>> -               INIT_HLIST_HEAD(&tun->flows[i]);
>>>> -
>>>> -       tun->ageing_time = TUN_FLOW_EXPIRE;
>>>> -       setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
>>>> -       mod_timer(&tun->flow_gc_timer,
>>>> -                 round_jiffies_up(jiffies + tun->ageing_time));
>>>> -}
>>>> -
>>>> -static void tun_flow_uninit(struct tun_struct *tun)
>>>> -{
>>>> -       del_timer_sync(&tun->flow_gc_timer);
>>>> -       tun_flow_flush(tun);
>>>> -}
>>>> -
>>>>  /* Initialize net device. */
>>>>  static void tun_net_init(struct net_device *dev)
>>>>  {
>>>> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>>>         int copylen;
>>>>         bool zerocopy = false;
>>>>         int err;
>>>> -       u32 rxhash;
>>>>
>>>>         if (!(tun->flags & TUN_NO_PI)) {
>>>>                 if (len < sizeof(pi))
>>>> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>>>>         skb_reset_network_header(skb);
>>>>         skb_probe_transport_header(skb, 0);
>>>>
>>>> -       rxhash = skb_get_rxhash(skb);
>>>> +       skb_set_queue_mapping(skb, tfile->queue_index);
>>>>         netif_rx_ni(skb);
>>>>
>>>>         tun->dev->stats.rx_packets++;
>>>>         tun->dev->stats.rx_bytes += len;
>>>>
>>>> -       tun_flow_update(tun, rxhash, tfile);
>>>>         return total_len;
>>>>  }
>>>>
>>>> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev)
>>>>         struct tun_struct *tun = netdev_priv(dev);
>>>>
>>>>         BUG_ON(!(list_empty(&tun->disabled)));
>>>> -       tun_flow_uninit(tun);
>>>>         security_tun_dev_free_security(tun->security);
>>>>         free_netdev(dev);
>>>>  }
>>>> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>>>                         goto err_free_dev;
>>>>
>>>>                 tun_net_init(dev);
>>>> -               tun_flow_init(tun);
>>>>
>>>>                 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>>>>                                    TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
>>>> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>>>                 INIT_LIST_HEAD(&tun->disabled);
>>>>                 err = tun_attach(tun, file, false);
>>>>                 if (err < 0)
>>>> -                       goto err_free_flow;
>>>> +                       goto err_free_security;
>>>>
>>>>                 err = register_netdevice(tun->dev);
>>>>                 if (err < 0)
>>>> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>>>>
>>>>  err_detach:
>>>>         tun_detach_all(dev);
>>>> -err_free_flow:
>>>> -       tun_flow_uninit(tun);
>>>> +err_free_security:
>>>>         security_tun_dev_free_security(tun->security);
>>>>  err_free_dev:
>>>>         free_netdev(dev);
>>>> --
>>>> 1.7.6.5
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
>>
>> --
>> Regards,
>>
>> Zhi Yong Wu



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] net, tun: remove the flow cache
  2013-12-18  2:08       ` Zhi Yong Wu
@ 2013-12-23  7:00         ` Jason Wang
  0 siblings, 0 replies; 12+ messages in thread
From: Jason Wang @ 2013-12-23  7:00 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: netdev, linux-kernel mlist, David S. Miller, Michael S. Tsirkin,
	Zhi Yong Wu

On 12/18/2013 10:08 AM, Zhi Yong Wu wrote:
> On Tue, Dec 17, 2013 at 6:05 PM, Jason Wang <jasowang@redhat.com> wrote:
>> On 12/17/2013 05:13 PM, Zhi Yong Wu wrote:
>>> On Tue, Dec 17, 2013 at 4:49 PM, Jason Wang <jasowang@redhat.com> wrote:
>>>>> On 12/17/2013 03:26 PM, Zhi Yong Wu wrote:
>>>>>>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>>>>>
>>>>>>> The flow cache is an extremely broken concept, and it usually brings up
>>>>>>> growth issues and DoS attacks, so this patch is trying to remove it from
>>>>>>> the tuntap driver, and insteadly use a simpler way for its flow control.
>>>>> NACK.
>>>>>
>>>>> This single function revert does not make sense to me. Since:
>>> IIRC, the tuntap flow cache is only used to save the mapping of skb
>>> packet <-> queue index. My idea only save the queue index in skb_buff
>>> early when skb buffer is filled, not in flow cache as the current
>>> code. This method is actually more simpler and completely doesn't need
>>> any flow cache.
>> Nope. Flow caches record the flow to queues mapping like what most
>> multiqueue nic does. The only difference is tun record it silently while
>> most nic needs driver to tell the mapping.
> Just check virtio specs, i seem to miss the fact that flow cache
> enable packet steering in mq mode, thanks for your comments. But i
> have some concerns about some of your comments.
>> What your patch does is:
>> - set the queue mapping of skb during tun_get_user(). But most drivers
>> using XPS or processor id to select the real tx queue. So the real txq
>> depends on the cpu that vhost or qemu is running. This setting does not
> Doesn't those drivers invoke netdev_pick_tx() or its counterpart to
> select real tx queue? e.g. tun_select_queue(). or can you say it with
> an example?

See __netdev_pick_tx() which will call get_xps_queues() to let XPS to
choose txq if there's no ndo_select_queue() method. Even if some driver
who has its own implementation, it will also call __netdev_pick_tx() for
ordinary traffic. Few drivers just using processor id to select txq. See
tile_net_select_queue(). Some drivers does this implicitly through XPS,
see ixgbe and virtio-net.

So the value you intend to "teach" the hardware nic may not work for
most of the cases.
> Moreover, how do those drivers know which cpu vhost or qemu is running on?

It does not know, but it can use processor id which is normally where
qemu/vhost is running.
>> have any effect in fact.
>> - the queue mapping of skb were fetched during tun_select_queue(). This
>> value is usually set by a multiqueue nic to record which hardware rxq
>> was this packet came.
> ah? Can you let me know where a mq nic controller set it?

Almost all multiqueue nic set this when it receives a packet. One
example is ixgbe_process_skb_fields().
>> Can you explain how your patch works exactly?
> You have understood it.
>>>>> - You in fact removes the flow steering function in tun. We definitely
>>>>> need something like this to unbreak the TCP performance in a multiqueue
>>> I don't think it will downgrade the TCP perf even in mq guest, but my
>>> idea maybe has better TCP perf, because it doesn't have any cache
>>> table lookup, etc.
>> Did you test and compare the performance numbers? Did you run profiler
>> to see how much does the lookup cost?
> No, As i jus said above, i miss that flow cache can enable packet
> steering. But Did you do related perf testing? To be honest, i am
> wondering how much perf the packet steering can improve. Actually it
> also injects a lot of cache lookup cost.

Not a lot unless there's a unbalance distribution of the hash bucket
which is very rare in the common case.

Without it, you may get a very huge regression even on a single stream
tcp netperf test. Flow caches guarantees packets of a single stream was
not processed by more than one vhost threads/vcpus which is very
important for TCP performance.
>>>>> guest. Please have a look at the virtio-net driver / virtio sepc for
>>>>> more information.
>>>>> - The total number of flow caches were limited to 4096, so there's no
>>>>> DoS or growth issue.
>>> Can you check why the ipv4 routing cache is removed? maybe i miss
>>> something, if yes, pls correct me. :)
>> The main differences is that the flow caches were best effort. Tun can
>> not store all flows to queue mapping, and even a hardware nic can not do
>> this. If a packet misses the flow cache, it's safe to distribute it
>> randomly or through another method. So the limitation just work.
> Exactly, we can know this from tun_select_queue().
>> Could you please explain the DoS or growth issue you meet here?
>>>>> - The only issue is scalability, but fixing this is not easy. We can
>>>>> just use arrays/indirection table like RSS instead of hash buckets, it
>>>>> saves some time in linear search but has other issues like collision
>>>>> - I've also had a RFC of using aRFS in the past, it also has several
>>>>> drawbacks such as busy looping in the networking hotspot.
>>>>>
>>>>> So in conclusion, we need flow steering in tun, just removing current
>>>>> method does not help. The proper way is to expose several different
>>>>> methods to user and let user to choose the preferable mechanism like
>>>>> packet.
>>> By the way, let us look at what other networking guys think of this,
>>> such as MST, dave, etc. :)
>>>
>> Of course.
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-12-23  7:00 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-17  7:26 [RFC PATCH] net, tun: remove the flow cache Zhi Yong Wu
2013-12-17  7:47 ` Stephen Hemminger
2013-12-17  8:13   ` Zhi Yong Wu
2013-12-17  8:49 ` Jason Wang
2013-12-17  9:13   ` Zhi Yong Wu
2013-12-17 10:05     ` Jason Wang
2013-12-18  2:08       ` Zhi Yong Wu
2013-12-23  7:00         ` Jason Wang
2013-12-18  4:06 ` Tom Herbert
2013-12-18  4:37   ` Zhi Yong Wu
2013-12-18  4:58     ` Tom Herbert
2013-12-18  5:23       ` Zhi Yong Wu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).