netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH net-next 0/3] virtio_net: add aRFS support
@ 2014-01-15 14:20 Zhi Yong Wu
  2014-01-15 14:20 ` [RFC PATCH net-next 1/3] virtio_pci: Introduce one new config api vp_get_vq_irq() Zhi Yong Wu
                   ` (3 more replies)
  0 siblings, 4 replies; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-15 14:20 UTC (permalink / raw)
  To: netdev; +Cc: therbert, edumazet, davem, Zhi Yong Wu

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

HI, folks

The patchset is trying to integrate aRFS support to virtio_net. In this case,
aRFS will be used to select the RX queue. To make sure that it's going ahead
in the correct direction, although it is still one RFC and isn't tested, it's
post out ASAP. Any comment are appreciated, thanks.

If anyone is interested in playing with it, you can get this patchset from my
dev git on github:
  git://github.com/wuzhy/kernel.git virtnet_rfs

Zhi Yong Wu (3):
  virtio_pci: Introduce one new config api vp_get_vq_irq()
  virtio_net: Introduce one dummy function virtnet_filter_rfs()
  virtio-net: Add accelerated RFS support

 drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
 drivers/virtio/virtio_pci.c   |   11 +++++++
 include/linux/virtio_config.h |   12 +++++++
 3 files changed, 89 insertions(+), 1 deletions(-)

-- 
1.7.6.5

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [RFC PATCH net-next 1/3] virtio_pci: Introduce one new config api vp_get_vq_irq()
  2014-01-15 14:20 [RFC PATCH net-next 0/3] virtio_net: add aRFS support Zhi Yong Wu
@ 2014-01-15 14:20 ` Zhi Yong Wu
  2014-01-15 14:20 ` [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs() Zhi Yong Wu
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-15 14:20 UTC (permalink / raw)
  To: netdev; +Cc: therbert, edumazet, davem, Zhi Yong Wu

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

This config api is used to get irq number of the given virtqueue.

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 drivers/virtio/virtio_pci.c   |   11 +++++++++++
 include/linux/virtio_config.h |   12 ++++++++++++
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index a37c699..85f1aa6 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -654,6 +654,16 @@ static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+static int vp_get_vq_irq(struct virtio_device *vdev, struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_vq_info *info = vq->priv;
+
+	if (vp_dev->msix_enabled)
+		return vp_dev->msix_entries[info->msix_vector].vector;
+	return -1;
+}
+
 static const struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
@@ -666,6 +676,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_irq	= vp_get_vq_irq,
 };
 
 static void virtio_pci_release_dev(struct device *_d)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e8f8f71..b70fc47 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -51,6 +51,9 @@
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
  * @set_vq_affinity: set the affinity for a virtqueue.
+ * @get_vq_irq: get irq of the given virtqueue
+ *	vdev: the virtio_device
+ *	vq: the virtqueue
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -70,6 +73,7 @@ struct virtio_config_ops {
 	void (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
+	int (*get_vq_irq)(struct virtio_device *vdev, struct virtqueue *vq);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
@@ -135,6 +139,14 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+static inline
+int virtqueue_get_vq_irq(struct virtio_device *vdev, struct virtqueue *vq)
+{
+	if (vdev->config->get_vq_irq)
+		return vdev->config->get_vq_irq(vdev, vq);
+	return -1;
+}
+
 /* Config space accessors. */
 #define virtio_cread(vdev, structname, member, ptr)			\
 	do {								\
-- 
1.7.6.5

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs()
  2014-01-15 14:20 [RFC PATCH net-next 0/3] virtio_net: add aRFS support Zhi Yong Wu
  2014-01-15 14:20 ` [RFC PATCH net-next 1/3] virtio_pci: Introduce one new config api vp_get_vq_irq() Zhi Yong Wu
@ 2014-01-15 14:20 ` Zhi Yong Wu
  2014-01-15 17:54   ` Tom Herbert
  2014-01-15 14:20 ` [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support Zhi Yong Wu
  2014-01-16  4:23 ` [RFC PATCH net-next 0/3] virtio_net: add aRFS support Jason Wang
  3 siblings, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-15 14:20 UTC (permalink / raw)
  To: netdev; +Cc: therbert, edumazet, davem, Zhi Yong Wu

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 drivers/net/virtio_net.c |   11 +++++++++++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7b17240..046421c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1295,6 +1295,14 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+#ifdef CONFIG_RFS_ACCEL
+static int virtnet_filter_rfs(struct net_device *net_dev,
+		const struct sk_buff *skb, u16 rxq_index, u32 flow_id)
+{
+	return 0;
+}
+#endif /* CONFIG_RFS_ACCEL */
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -1309,6 +1317,9 @@ static const struct net_device_ops virtnet_netdev = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer   = virtnet_filter_rfs,
+#endif
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
-- 
1.7.6.5

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-15 14:20 [RFC PATCH net-next 0/3] virtio_net: add aRFS support Zhi Yong Wu
  2014-01-15 14:20 ` [RFC PATCH net-next 1/3] virtio_pci: Introduce one new config api vp_get_vq_irq() Zhi Yong Wu
  2014-01-15 14:20 ` [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs() Zhi Yong Wu
@ 2014-01-15 14:20 ` Zhi Yong Wu
  2014-01-16 21:31   ` Ben Hutchings
  2014-01-16  4:23 ` [RFC PATCH net-next 0/3] virtio_net: add aRFS support Jason Wang
  3 siblings, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-15 14:20 UTC (permalink / raw)
  To: netdev; +Cc: therbert, edumazet, davem, Zhi Yong Wu

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

Accelerated RFS is used to select the RX queue.

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 drivers/net/virtio_net.c |   56 +++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 55 insertions(+), 1 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 046421c..9f4cfa7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -26,6 +26,7 @@
 #include <linux/if_vlan.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/cpu_rmap.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -1554,6 +1555,49 @@ err:
 	return ret;
 }
 
+static void virtnet_free_irq_cpu_rmap(struct virtnet_info *vi)
+{
+#ifdef CONFIG_RFS_ACCEL
+	free_irq_cpu_rmap(vi->dev->rx_cpu_rmap);
+	vi->dev->rx_cpu_rmap = NULL;
+#endif
+}
+
+static int virtnet_init_rx_cpu_rmap(struct virtnet_info *vi)
+{
+	int rc = 0;
+
+#ifdef CONFIG_RFS_ACCEL
+	struct virtio_device *vdev = vi->vdev;
+	unsigned int irq;
+	int i;
+
+	if (!vi->affinity_hint_set)
+		goto out;
+
+	vi->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(vi->max_queue_pairs);
+	if (!vi->dev->rx_cpu_rmap) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		irq = virtqueue_get_vq_irq(vdev, vi->rq[i].vq);
+		if (irq == -1)
+			goto failed;
+
+		rc = irq_cpu_rmap_add(vi->dev->rx_cpu_rmap, irq);
+		if (rc) {
+failed:
+			virtnet_free_irq_cpu_rmap(vi);
+			goto out;
+		}
+	}
+out:
+#endif
+	return rc;
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int i, err;
@@ -1671,10 +1715,16 @@ static int virtnet_probe(struct virtio_device *vdev)
 	netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
 	netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
 
+	err = virtnet_init_rx_cpu_rmap(vi);
+	if (err) {
+		pr_debug("virtio_net: initializing rx cpu rmap failed\n");
+		goto free_vqs;
+	}
+
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-		goto free_vqs;
+		goto free_cpu_rmap;
 	}
 
 	/* Last of all, set up some receive buffers. */
@@ -1714,6 +1764,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 free_recv_bufs:
 	free_receive_bufs(vi);
 	unregister_netdev(dev);
+free_cpu_rmap:
+	virtnet_free_irq_cpu_rmap(vi);
 free_vqs:
 	cancel_delayed_work_sync(&vi->refill);
 	virtnet_del_vqs(vi);
@@ -1751,6 +1803,8 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	unregister_netdev(vi->dev);
 
+	virtnet_free_irq_cpu_rmap(vi);
+
 	remove_vq_common(vi);
 	if (vi->alloc_frag.page)
 		put_page(vi->alloc_frag.page);
-- 
1.7.6.5

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs()
  2014-01-15 14:20 ` [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs() Zhi Yong Wu
@ 2014-01-15 17:54   ` Tom Herbert
  2014-01-16  2:45     ` Zhi Yong Wu
  0 siblings, 1 reply; 32+ messages in thread
From: Tom Herbert @ 2014-01-15 17:54 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: Linux Netdev List, Eric Dumazet, David Miller, Zhi Yong Wu

Zhi, this is promising work! I can't wait to see how this impacts
network virtualization performance :-)

On Wed, Jan 15, 2014 at 6:20 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>
> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> ---
>  drivers/net/virtio_net.c |   11 +++++++++++
>  1 files changed, 11 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7b17240..046421c 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1295,6 +1295,14 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
>         return 0;
>  }
>
> +#ifdef CONFIG_RFS_ACCEL
> +static int virtnet_filter_rfs(struct net_device *net_dev,
> +               const struct sk_buff *skb, u16 rxq_index, u32 flow_id)
> +{
Does this need to be filled out with more stuff?

> +       return 0;
> +}
> +#endif /* CONFIG_RFS_ACCEL */
> +
>  static const struct net_device_ops virtnet_netdev = {
>         .ndo_open            = virtnet_open,
>         .ndo_stop            = virtnet_close,
> @@ -1309,6 +1317,9 @@ static const struct net_device_ops virtnet_netdev = {
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>         .ndo_poll_controller = virtnet_netpoll,
>  #endif
> +#ifdef CONFIG_RFS_ACCEL
> +       .ndo_rx_flow_steer   = virtnet_filter_rfs,
> +#endif
>  };
>
>  static void virtnet_config_changed_work(struct work_struct *work)
> --
> 1.7.6.5
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs()
  2014-01-15 17:54   ` Tom Herbert
@ 2014-01-16  2:45     ` Zhi Yong Wu
  0 siblings, 0 replies; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-16  2:45 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Linux Netdev List, Eric Dumazet, David Miller, Zhi Yong Wu

On Thu, Jan 16, 2014 at 1:54 AM, Tom Herbert <therbert@google.com> wrote:
> Zhi, this is promising work! I can't wait to see how this impacts
"Zhi" is part of my first name, you call me "Zhi Yong".
> network virtualization performance :-)
heh, too much work is missing.
>
> On Wed, Jan 15, 2014 at 6:20 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> ---
>>  drivers/net/virtio_net.c |   11 +++++++++++
>>  1 files changed, 11 insertions(+), 0 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 7b17240..046421c 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -1295,6 +1295,14 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
>>         return 0;
>>  }
>>
>> +#ifdef CONFIG_RFS_ACCEL
>> +static int virtnet_filter_rfs(struct net_device *net_dev,
>> +               const struct sk_buff *skb, u16 rxq_index, u32 flow_id)
>> +{
> Does this need to be filled out with more stuff?
Yes, the following stuff are missing:
1.)  guest virtio_net driver should have one filter table and its
entries can be expired periodically;
2.)  guest virtio_net driver should pass rx queue index and filter
info down to the emulated virtio_net NIC in QEMU.
3.) the emulated virtio_net NIC should have its indirect table to
store the flow to rx queue mapping.
4.) the emulated virtio_net NIC should classify the rx packet to
selected queue by applying the filter.
5.) update virtio spec.
Do i miss anything? If yes, please correct me.
For 3.) and 4.), do you have any doc about how they are implemented in
physical NICs? e.g. mlx4_en or sfc, etc.

>
>> +       return 0;
>> +}
>> +#endif /* CONFIG_RFS_ACCEL */
>> +
>>  static const struct net_device_ops virtnet_netdev = {
>>         .ndo_open            = virtnet_open,
>>         .ndo_stop            = virtnet_close,
>> @@ -1309,6 +1317,9 @@ static const struct net_device_ops virtnet_netdev = {
>>  #ifdef CONFIG_NET_POLL_CONTROLLER
>>         .ndo_poll_controller = virtnet_netpoll,
>>  #endif
>> +#ifdef CONFIG_RFS_ACCEL
>> +       .ndo_rx_flow_steer   = virtnet_filter_rfs,
>> +#endif
>>  };
>>
>>  static void virtnet_config_changed_work(struct work_struct *work)
>> --
>> 1.7.6.5
>>



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-15 14:20 [RFC PATCH net-next 0/3] virtio_net: add aRFS support Zhi Yong Wu
                   ` (2 preceding siblings ...)
  2014-01-15 14:20 ` [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support Zhi Yong Wu
@ 2014-01-16  4:23 ` Jason Wang
  2014-01-16  8:34   ` Fwd: " Zhi Yong Wu
  2014-01-16  8:48   ` Zhi Yong Wu
  3 siblings, 2 replies; 32+ messages in thread
From: Jason Wang @ 2014-01-16  4:23 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: netdev, therbert, edumazet, davem, Zhi Yong Wu

On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>
> HI, folks
>
> The patchset is trying to integrate aRFS support to virtio_net. In this case,
> aRFS will be used to select the RX queue. To make sure that it's going ahead
> in the correct direction, although it is still one RFC and isn't tested, it's
> post out ASAP. Any comment are appreciated, thanks.
>
> If anyone is interested in playing with it, you can get this patchset from my
> dev git on github:
>    git://github.com/wuzhy/kernel.git virtnet_rfs
>
> Zhi Yong Wu (3):
>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>    virtio-net: Add accelerated RFS support
>
>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>   drivers/virtio/virtio_pci.c   |   11 +++++++
>   include/linux/virtio_config.h |   12 +++++++
>   3 files changed, 89 insertions(+), 1 deletions(-)
>

Please run get_maintainter.pl before sending the patch. You'd better at 
least cc virtio maintainer/list for this.

The core aRFS method is a noop in this RFC which make this series no 
much sense to discuss. You should at least mention the big picture here 
in the cover letter. I suggest you should post a RFC which can run and 
has expected result or you can just raise a thread for the design 
discussion.

And this method has been discussed before, you can search "[net-next RFC 
PATCH 5/5] virtio-net: flow director support" in netdev archive for a 
very old prototype implemented by me. It can work and looks like most of 
this RFC have already done there.

A basic question is whether or not we need this, not all the mq cards 
use aRFS (see ixgbe ATR). And whether or not it can bring extra 
overheads? For virtio, we want to reduce the vmexits as much as possible 
but this aRFS seems introduce a lot of more of this. Making a complex 
interfaces just for an virtual device may not be good, simple method may 
works for most of the cases.

We really should consider to offload this to real nic. VMDq and L2 
forwarding offload may help in this case.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16  4:23 ` [RFC PATCH net-next 0/3] virtio_net: add aRFS support Jason Wang
@ 2014-01-16  8:34   ` Zhi Yong Wu
  2014-01-16  8:52     ` Stefan Hajnoczi
  2014-01-16  8:48   ` Zhi Yong Wu
  1 sibling, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-16  8:34 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell, Jason Wang

CC: stefanha, MST, Rusty Russel

---------- Forwarded message ----------
From: Jason Wang <jasowang@redhat.com>
Date: Thu, Jan 16, 2014 at 12:23 PM
Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
To: Zhi Yong Wu <zwu.kernel@gmail.com>
Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>


On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>
> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>
> HI, folks
>
> The patchset is trying to integrate aRFS support to virtio_net. In this case,
> aRFS will be used to select the RX queue. To make sure that it's going ahead
> in the correct direction, although it is still one RFC and isn't tested, it's
> post out ASAP. Any comment are appreciated, thanks.
>
> If anyone is interested in playing with it, you can get this patchset from my
> dev git on github:
>    git://github.com/wuzhy/kernel.git virtnet_rfs
>
> Zhi Yong Wu (3):
>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>    virtio-net: Add accelerated RFS support
>
>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>   drivers/virtio/virtio_pci.c   |   11 +++++++
>   include/linux/virtio_config.h |   12 +++++++
>   3 files changed, 89 insertions(+), 1 deletions(-)
>

Please run get_maintainter.pl before sending the patch. You'd better
at least cc virtio maintainer/list for this.

The core aRFS method is a noop in this RFC which make this series no
much sense to discuss. You should at least mention the big picture
here in the cover letter. I suggest you should post a RFC which can
run and has expected result or you can just raise a thread for the
design discussion.

And this method has been discussed before, you can search "[net-next
RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
for a very old prototype implemented by me. It can work and looks like
most of this RFC have already done there.

A basic question is whether or not we need this, not all the mq cards
use aRFS (see ixgbe ATR). And whether or not it can bring extra
overheads? For virtio, we want to reduce the vmexits as much as
possible but this aRFS seems introduce a lot of more of this. Making a
complex interfaces just for an virtual device may not be good, simple
method may works for most of the cases.

We really should consider to offload this to real nic. VMDq and L2
forwarding offload may help in this case.


-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16  4:23 ` [RFC PATCH net-next 0/3] virtio_net: add aRFS support Jason Wang
  2014-01-16  8:34   ` Fwd: " Zhi Yong Wu
@ 2014-01-16  8:48   ` Zhi Yong Wu
  2014-01-23 14:26     ` Michael S. Tsirkin
  1 sibling, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-16  8:48 UTC (permalink / raw)
  To: Jason Wang
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Stefan Hajnoczi, Michael S. Tsirkin, Rusty Russell

On Thu, Jan 16, 2014 at 12:23 PM, Jason Wang <jasowang@redhat.com> wrote:
> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>
>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>
>> HI, folks
>>
>> The patchset is trying to integrate aRFS support to virtio_net. In this
>> case,
>> aRFS will be used to select the RX queue. To make sure that it's going
>> ahead
>> in the correct direction, although it is still one RFC and isn't tested,
>> it's
>> post out ASAP. Any comment are appreciated, thanks.
>>
>> If anyone is interested in playing with it, you can get this patchset from
>> my
>> dev git on github:
>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>
>> Zhi Yong Wu (3):
>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>    virtio-net: Add accelerated RFS support
>>
>>   drivers/net/virtio_net.c      |   67
>> ++++++++++++++++++++++++++++++++++++++++-
>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>   include/linux/virtio_config.h |   12 +++++++
>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>
>
> Please run get_maintainter.pl before sending the patch. You'd better at
> least cc virtio maintainer/list for this.
Is this one must for virtio stuff?

>
> The core aRFS method is a noop in this RFC which make this series no much
> sense to discuss. You should at least mention the big picture here in the
> cover letter. I suggest you should post a RFC which can run and has expected
> result or you can just raise a thread for the design discussion.
Yes, it currently miss some important stuff as i said in another mail
of this series.

>
> And this method has been discussed before, you can search "[net-next RFC
> PATCH 5/5] virtio-net: flow director support" in netdev archive for a very
> old prototype implemented by me. It can work and looks like most of this RFC
> have already done there.
ah? Can you let me know the result of your discussion? Will checked
it, thanks for your pointer.
>
> A basic question is whether or not we need this, not all the mq cards use
> aRFS (see ixgbe ATR). And whether or not it can bring extra overheads? For
> virtio, we want to reduce the vmexits as much as possible but this aRFS
Good question, i also have concern about this, and don't know if Tom
has good explanation.
> seems introduce a lot of more of this. Making a complex interfaces just for
> an virtual device may not be good, simple method may works for most of the
> cases.
>
> We really should consider to offload this to real nic. VMDq and L2
> forwarding offload may help in this case.

By the way, Stefan, can you let us know your concerns here? as we
talked in irc channel. :)


-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16  8:34   ` Fwd: " Zhi Yong Wu
@ 2014-01-16  8:52     ` Stefan Hajnoczi
  2014-01-16 17:12       ` Tom Herbert
  2014-01-17  3:04       ` Jason Wang
  0 siblings, 2 replies; 32+ messages in thread
From: Stefan Hajnoczi @ 2014-01-16  8:52 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell, Jason Wang

On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
> CC: stefanha, MST, Rusty Russel
> 
> ---------- Forwarded message ----------
> From: Jason Wang <jasowang@redhat.com>
> Date: Thu, Jan 16, 2014 at 12:23 PM
> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
> To: Zhi Yong Wu <zwu.kernel@gmail.com>
> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> 
> 
> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
> >
> > From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
> >
> > HI, folks
> >
> > The patchset is trying to integrate aRFS support to virtio_net. In this case,
> > aRFS will be used to select the RX queue. To make sure that it's going ahead
> > in the correct direction, although it is still one RFC and isn't tested, it's
> > post out ASAP. Any comment are appreciated, thanks.
> >
> > If anyone is interested in playing with it, you can get this patchset from my
> > dev git on github:
> >    git://github.com/wuzhy/kernel.git virtnet_rfs
> >
> > Zhi Yong Wu (3):
> >    virtio_pci: Introduce one new config api vp_get_vq_irq()
> >    virtio_net: Introduce one dummy function virtnet_filter_rfs()
> >    virtio-net: Add accelerated RFS support
> >
> >   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
> >   drivers/virtio/virtio_pci.c   |   11 +++++++
> >   include/linux/virtio_config.h |   12 +++++++
> >   3 files changed, 89 insertions(+), 1 deletions(-)
> >
> 
> Please run get_maintainter.pl before sending the patch. You'd better
> at least cc virtio maintainer/list for this.
> 
> The core aRFS method is a noop in this RFC which make this series no
> much sense to discuss. You should at least mention the big picture
> here in the cover letter. I suggest you should post a RFC which can
> run and has expected result or you can just raise a thread for the
> design discussion.
> 
> And this method has been discussed before, you can search "[net-next
> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
> for a very old prototype implemented by me. It can work and looks like
> most of this RFC have already done there.
> 
> A basic question is whether or not we need this, not all the mq cards
> use aRFS (see ixgbe ATR). And whether or not it can bring extra
> overheads? For virtio, we want to reduce the vmexits as much as
> possible but this aRFS seems introduce a lot of more of this. Making a
> complex interfaces just for an virtual device may not be good, simple
> method may works for most of the cases.
> 
> We really should consider to offload this to real nic. VMDq and L2
> forwarding offload may help in this case.

Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
list - it's still the same concern I had in the old email thread that
Jason mentioned.

In order for virtio-net aRFS to make sense there needs to be an overall
plan for pushing flow mapping information down to the physical NIC.
That's the only way to actually achieve the benefit of steering:
processing the packet on the CPU where the application is running.

If it's not possible or too hard to implement aRFS down the entire
stack, we won't be able to process the packet on the right CPU.
Then we might as well not bother with aRFS and just distribute uniformly
across the rx virtqueues.

Please post an outline of how rx packets will be steered up the stack so
we can discuss whether aRFS can bring any benefit.

Stefan

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16  8:52     ` Stefan Hajnoczi
@ 2014-01-16 17:12       ` Tom Herbert
  2014-01-17  3:26         ` Jason Wang
                           ` (2 more replies)
  2014-01-17  3:04       ` Jason Wang
  1 sibling, 3 replies; 32+ messages in thread
From: Tom Herbert @ 2014-01-16 17:12 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Zhi Yong Wu, Linux Netdev List, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell, Jason Wang

On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>> CC: stefanha, MST, Rusty Russel
>>
>> ---------- Forwarded message ----------
>> From: Jason Wang <jasowang@redhat.com>
>> Date: Thu, Jan 16, 2014 at 12:23 PM
>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>>
>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>> >
>> > From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>> >
>> > HI, folks
>> >
>> > The patchset is trying to integrate aRFS support to virtio_net. In this case,
>> > aRFS will be used to select the RX queue. To make sure that it's going ahead
>> > in the correct direction, although it is still one RFC and isn't tested, it's
>> > post out ASAP. Any comment are appreciated, thanks.
>> >
>> > If anyone is interested in playing with it, you can get this patchset from my
>> > dev git on github:
>> >    git://github.com/wuzhy/kernel.git virtnet_rfs
>> >
>> > Zhi Yong Wu (3):
>> >    virtio_pci: Introduce one new config api vp_get_vq_irq()
>> >    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>> >    virtio-net: Add accelerated RFS support
>> >
>> >   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>> >   drivers/virtio/virtio_pci.c   |   11 +++++++
>> >   include/linux/virtio_config.h |   12 +++++++
>> >   3 files changed, 89 insertions(+), 1 deletions(-)
>> >
>>
>> Please run get_maintainter.pl before sending the patch. You'd better
>> at least cc virtio maintainer/list for this.
>>
>> The core aRFS method is a noop in this RFC which make this series no
>> much sense to discuss. You should at least mention the big picture
>> here in the cover letter. I suggest you should post a RFC which can
>> run and has expected result or you can just raise a thread for the
>> design discussion.
>>
>> And this method has been discussed before, you can search "[net-next
>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>> for a very old prototype implemented by me. It can work and looks like
>> most of this RFC have already done there.
>>
>> A basic question is whether or not we need this, not all the mq cards
>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>> overheads? For virtio, we want to reduce the vmexits as much as
>> possible but this aRFS seems introduce a lot of more of this. Making a
>> complex interfaces just for an virtual device may not be good, simple
>> method may works for most of the cases.
>>
>> We really should consider to offload this to real nic. VMDq and L2
>> forwarding offload may help in this case.
>
Adding flow director support would be a good step, Zhi's patches for
support in tun have been merged, so support in virtio-net would be a
good follow on. But, flow-director does have some limitations and
performance issues of it's own (forced pairing between TX and RX
queues, lookup on every TX packet). In the case of virtualization,
aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
emulations and so far seems to be wins in most cases. Extending these
down into the stack so that they can leverage HW mechanisms is a good
goal for best performance. It's probably generally true that most of
the offloads commonly available for NICs we'll want in virtualization
path. Of course, we need to deomonstrate that they provide real
performance benefit in this use case.

I believe tying in aRFS (or flow director) into a real aRFS is just a
matter of programming the RFS table properly. This is not the complex
side of the interface, I believe this already works with the tun
patches.

> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
> list - it's still the same concern I had in the old email thread that
> Jason mentioned.
>
> In order for virtio-net aRFS to make sense there needs to be an overall
> plan for pushing flow mapping information down to the physical NIC.
> That's the only way to actually achieve the benefit of steering:
> processing the packet on the CPU where the application is running.
>
I don't think this is necessarily true. Per flow steering amongst
virtual queues should be beneficial in itself. virtio-net can leverage
RFS or aRFS where it's available.

> If it's not possible or too hard to implement aRFS down the entire
> stack, we won't be able to process the packet on the right CPU.
> Then we might as well not bother with aRFS and just distribute uniformly
> across the rx virtqueues.
>
> Please post an outline of how rx packets will be steered up the stack so
> we can discuss whether aRFS can bring any benefit.
>
1. The aRFS interface for the guest to specify which virtual queue to
receive a packet on is fairly straight forward.
2. To hook into RFS, we need to match the virtual queue to the real
CPU it will processed on, and then program the RFS table for that flow
and CPU.
3. NIC aRFS keys off the RFS tables so it can program the HW with the
correct queue for the CPU.

> Stefan

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-15 14:20 ` [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support Zhi Yong Wu
@ 2014-01-16 21:31   ` Ben Hutchings
  2014-01-16 22:00     ` Zhi Yong Wu
  0 siblings, 1 reply; 32+ messages in thread
From: Ben Hutchings @ 2014-01-16 21:31 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: netdev, therbert, edumazet, davem, Zhi Yong Wu

On Wed, 2014-01-15 at 22:20 +0800, Zhi Yong Wu wrote:
[...]
> +static int virtnet_init_rx_cpu_rmap(struct virtnet_info *vi)
> +{
> +	int rc = 0;
> +
> +#ifdef CONFIG_RFS_ACCEL
> +	struct virtio_device *vdev = vi->vdev;
> +	unsigned int irq;
> +	int i;
> +
> +	if (!vi->affinity_hint_set)
> +		goto out;
> +
> +	vi->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(vi->max_queue_pairs);
> +	if (!vi->dev->rx_cpu_rmap) {
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < vi->max_queue_pairs; i++) {
> +		irq = virtqueue_get_vq_irq(vdev, vi->rq[i].vq);
> +		if (irq == -1)
> +			goto failed;

Jumping into an if-statement is confusing.  Also do you really want to
return 0 in this case?

Otherwise this looks fine.

Ben.

> +		rc = irq_cpu_rmap_add(vi->dev->rx_cpu_rmap, irq);
> +		if (rc) {
> +failed:
> +			virtnet_free_irq_cpu_rmap(vi);
> +			goto out;
> +		}
> +	}
> +out:
> +#endif
> +	return rc;
> +}
[...]

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-16 21:31   ` Ben Hutchings
@ 2014-01-16 22:00     ` Zhi Yong Wu
  2014-01-16 23:16       ` Ben Hutchings
  0 siblings, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-16 22:00 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu

On Fri, Jan 17, 2014 at 5:31 AM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Wed, 2014-01-15 at 22:20 +0800, Zhi Yong Wu wrote:
> [...]
>> +static int virtnet_init_rx_cpu_rmap(struct virtnet_info *vi)
>> +{
>> +     int rc = 0;
>> +
>> +#ifdef CONFIG_RFS_ACCEL
>> +     struct virtio_device *vdev = vi->vdev;
>> +     unsigned int irq;
>> +     int i;
>> +
>> +     if (!vi->affinity_hint_set)
>> +             goto out;
>> +
>> +     vi->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(vi->max_queue_pairs);
>> +     if (!vi->dev->rx_cpu_rmap) {
>> +             rc = -ENOMEM;
>> +             goto out;
>> +     }
>> +
>> +     for (i = 0; i < vi->max_queue_pairs; i++) {
>> +             irq = virtqueue_get_vq_irq(vdev, vi->rq[i].vq);
>> +             if (irq == -1)
>> +                     goto failed;
>
> Jumping into an if-statement is confusing.  Also do you really want to
> return 0 in this case?
No, If it fail to get irq, i want it to exit as soon as possible,
otherwise it will cause irq_cpu_rmap_add() to be invoked with one
incorrect argument irq.

By the way, do you have thought about if it makes sense to add aRFS
support to virtio_net? For [patch 2/3], what do you think of those
missing stuff listed by me?
For how indirect table is implemented in sfc NIC, do you have any doc
to share with  me? thanks.

>
> Otherwise this looks fine.
>
> Ben.
>
>> +             rc = irq_cpu_rmap_add(vi->dev->rx_cpu_rmap, irq);
>> +             if (rc) {
>> +failed:
>> +                     virtnet_free_irq_cpu_rmap(vi);
>> +                     goto out;
>> +             }
>> +     }
>> +out:
>> +#endif
>> +     return rc;
>> +}
> [...]
>
> --
> Ben Hutchings, Staff Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-16 22:00     ` Zhi Yong Wu
@ 2014-01-16 23:16       ` Ben Hutchings
  2014-01-17 16:54         ` Zhi Yong Wu
  0 siblings, 1 reply; 32+ messages in thread
From: Ben Hutchings @ 2014-01-16 23:16 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu

On Fri, 2014-01-17 at 06:00 +0800, Zhi Yong Wu wrote:
> On Fri, Jan 17, 2014 at 5:31 AM, Ben Hutchings
> <bhutchings@solarflare.com> wrote:
> > On Wed, 2014-01-15 at 22:20 +0800, Zhi Yong Wu wrote:
> > [...]
> >> +static int virtnet_init_rx_cpu_rmap(struct virtnet_info *vi)
> >> +{
> >> +     int rc = 0;
> >> +
> >> +#ifdef CONFIG_RFS_ACCEL
> >> +     struct virtio_device *vdev = vi->vdev;
> >> +     unsigned int irq;
> >> +     int i;
> >> +
> >> +     if (!vi->affinity_hint_set)
> >> +             goto out;
> >> +
> >> +     vi->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(vi->max_queue_pairs);
> >> +     if (!vi->dev->rx_cpu_rmap) {
> >> +             rc = -ENOMEM;
> >> +             goto out;
> >> +     }
> >> +
> >> +     for (i = 0; i < vi->max_queue_pairs; i++) {
> >> +             irq = virtqueue_get_vq_irq(vdev, vi->rq[i].vq);
> >> +             if (irq == -1)
> >> +                     goto failed;
> >
> > Jumping into an if-statement is confusing.  Also do you really want to
> > return 0 in this case?
> No, If it fail to get irq, i want it to exit as soon as possible,
> otherwise it will cause irq_cpu_rmap_add() to be invoked with one
> incorrect argument irq.

Well currently this goto does result in returning 0, as rc has not been
changed after its initialisation to 0.

> By the way, do you have thought about if it makes sense to add aRFS
> support to virtio_net? For [patch 2/3], what do you think of those
> missing stuff listed by me?
> For how indirect table is implemented in sfc NIC, do you have any doc
> to share with  me? thanks.

Going through that list:

> 1.)  guest virtio_net driver should have one filter table and its
> entries can be expired periodically;

In sfc, we keep a count how many entries have been inserted in each NAPI
context.  Whenever the NAPI poll function is about to call
napi_complete() and the count for that context has reached a trigger
level, it will scan some quota of filter entries for expiry.

> 2.)  guest virtio_net driver should pass rx queue index and filter
> info down to the emulated virtio_net NIC in QEMU.
> 3.) the emulated virtio_net NIC should have its indirect table to
> store the flow to rx queue mapping.
> 4.) the emulated virtio_net NIC should classify the rx packet to
> selected queue by applying the filter.

I think the most efficient way to do this would be to put a hash table
in some shared memory that both guest and host can read and write.  The
virtio control path would only be used to set up and tear down the
table.  I don't know whether virtio allows for that.

However, to take advantage of ARFS on a physical net driver, it would be
necessary to send a control request for part 2.

> 5.) update virtio spec.
> Do i miss anything? If yes, please correct me.
> For 3.) and 4.), do you have any doc about how they are implemented in
> physical NICs? e.g. mlx4_en or sfc, etc.

The Programmer's Reference Manuals for Solarflare controllers are only
available under NDA.  I can describe the hardware filtering briefly, but
actually I don't think it's very relevant to virtio_net.

There is a typical RSS hash indirection table (128 entries), but for
ARFS we use a different RX filter table which has 8K entries
(RX_FILTER_TBL0 on SFC4000/SFC9000 family).

Solarflare controllers support user-level networking, which requires
perfect filtering to deliver each application's flows into that
application's dedicated RX queue(s).  Lookups in this larger filter
table are still hash-based, but each entry specifies a TCP/IP or UDP/IP
4-tuple or local 2-tuple to match.  ARFS uses the 4-tuple type only.

To allow for hash collisions, a secondary hash function generates an
increment to be added to the initial table index repeatedly for hash
chaining.  There is a control register which tells the controller the
maximum hash chain length to search for each IP filter type; after this
it will fall back to checking MAC filters and then default filters.

On the SFC9100 family, filter updates and lookups are implemented by
firmware and the driver doesn't manage the filter table itself, but I
know it is still a hash table of perfect filters.

For ARFS, perfect filtering is not needed.  I think it would be
preferable to use a fairly big hash table and make insertion fail in
case of a collision.  Since the backend for virtio_net will do RX queue
selection in software, the entire table of queue indices should fit into
its L1 cache.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16  8:52     ` Stefan Hajnoczi
  2014-01-16 17:12       ` Tom Herbert
@ 2014-01-17  3:04       ` Jason Wang
  2014-01-20 16:49         ` Stefan Hajnoczi
  1 sibling, 1 reply; 32+ messages in thread
From: Jason Wang @ 2014-01-17  3:04 UTC (permalink / raw)
  To: Stefan Hajnoczi, Zhi Yong Wu
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On 01/16/2014 04:52 PM, Stefan Hajnoczi wrote:
> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>> CC: stefanha, MST, Rusty Russel
>>
>> ---------- Forwarded message ----------
>> From: Jason Wang <jasowang@redhat.com>
>> Date: Thu, Jan 16, 2014 at 12:23 PM
>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>>
>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>>
>>> HI, folks
>>>
>>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>> aRFS will be used to select the RX queue. To make sure that it's going ahead
>>> in the correct direction, although it is still one RFC and isn't tested, it's
>>> post out ASAP. Any comment are appreciated, thanks.
>>>
>>> If anyone is interested in playing with it, you can get this patchset from my
>>> dev git on github:
>>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>>
>>> Zhi Yong Wu (3):
>>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>>    virtio-net: Add accelerated RFS support
>>>
>>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>>   include/linux/virtio_config.h |   12 +++++++
>>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>>
>> Please run get_maintainter.pl before sending the patch. You'd better
>> at least cc virtio maintainer/list for this.
>>
>> The core aRFS method is a noop in this RFC which make this series no
>> much sense to discuss. You should at least mention the big picture
>> here in the cover letter. I suggest you should post a RFC which can
>> run and has expected result or you can just raise a thread for the
>> design discussion.
>>
>> And this method has been discussed before, you can search "[net-next
>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>> for a very old prototype implemented by me. It can work and looks like
>> most of this RFC have already done there.
>>
>> A basic question is whether or not we need this, not all the mq cards
>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>> overheads? For virtio, we want to reduce the vmexits as much as
>> possible but this aRFS seems introduce a lot of more of this. Making a
>> complex interfaces just for an virtual device may not be good, simple
>> method may works for most of the cases.
>>
>> We really should consider to offload this to real nic. VMDq and L2
>> forwarding offload may help in this case.
> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
> list - it's still the same concern I had in the old email thread that
> Jason mentioned.
>
> In order for virtio-net aRFS to make sense there needs to be an overall
> plan for pushing flow mapping information down to the physical NIC.
> That's the only way to actually achieve the benefit of steering:
> processing the packet on the CPU where the application is running.

There are several places that the packet needs to be processing:
1) If you mean send the interrupt of virtio-net to the vcpu that the
application is running, current virito-net has already had the ability
with the help of flow caches of tun and the vcpu private queue
configuration of XPS and irq affinity hint which is automatically done
through the driver itself.
2) If you mean send the interrupt or doing the softirq of the host card
to the cpu where vhost thread is running, recent RFS support of tun can
do this.
3) And about the affinity of vhost thread and vcpu thread, this seems
beyond the scope of aRFS.

So it looks like current code works (though may have some drawbacks).

aRFS just another method of doing 1) by letting the driver can control
the flow to queue mapping. Currently tun do it silently and does not
expose an API for userspace to configure. The question is whether or not
we need this and if yes how can implement it efficiently.
>
> If it's not possible or too hard to implement aRFS down the entire
> stack, we won't be able to process the packet on the right CPU.
> Then we might as well not bother with aRFS and just distribute uniformly
> across the rx virtqueues.
>
> Please post an outline of how rx packets will be steered up the stack so
> we can discuss whether aRFS can bring any benefit.
>
> Stefan

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16 17:12       ` Tom Herbert
@ 2014-01-17  3:26         ` Jason Wang
  2014-01-17  5:08           ` Tom Herbert
  2014-01-17  5:22         ` Stefan Hajnoczi
  2014-01-22 13:27         ` Zhi Yong Wu
  2 siblings, 1 reply; 32+ messages in thread
From: Jason Wang @ 2014-01-17  3:26 UTC (permalink / raw)
  To: Tom Herbert, Stefan Hajnoczi
  Cc: Zhi Yong Wu, Linux Netdev List, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On 01/17/2014 01:12 AM, Tom Herbert wrote:
> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>>> CC: stefanha, MST, Rusty Russel
>>>
>>> ---------- Forwarded message ----------
>>> From: Jason Wang <jasowang@redhat.com>
>>> Date: Thu, Jan 16, 2014 at 12:23 PM
>>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>
>>>
>>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>>>
>>>> HI, folks
>>>>
>>>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>>> aRFS will be used to select the RX queue. To make sure that it's going ahead
>>>> in the correct direction, although it is still one RFC and isn't tested, it's
>>>> post out ASAP. Any comment are appreciated, thanks.
>>>>
>>>> If anyone is interested in playing with it, you can get this patchset from my
>>>> dev git on github:
>>>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>>>
>>>> Zhi Yong Wu (3):
>>>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>>>    virtio-net: Add accelerated RFS support
>>>>
>>>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>>>   include/linux/virtio_config.h |   12 +++++++
>>>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>>>
>>> Please run get_maintainter.pl before sending the patch. You'd better
>>> at least cc virtio maintainer/list for this.
>>>
>>> The core aRFS method is a noop in this RFC which make this series no
>>> much sense to discuss. You should at least mention the big picture
>>> here in the cover letter. I suggest you should post a RFC which can
>>> run and has expected result or you can just raise a thread for the
>>> design discussion.
>>>
>>> And this method has been discussed before, you can search "[net-next
>>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>>> for a very old prototype implemented by me. It can work and looks like
>>> most of this RFC have already done there.
>>>
>>> A basic question is whether or not we need this, not all the mq cards
>>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>>> overheads? For virtio, we want to reduce the vmexits as much as
>>> possible but this aRFS seems introduce a lot of more of this. Making a
>>> complex interfaces just for an virtual device may not be good, simple
>>> method may works for most of the cases.
>>>
>>> We really should consider to offload this to real nic. VMDq and L2
>>> forwarding offload may help in this case.
> Adding flow director support would be a good step, Zhi's patches for
> support in tun have been merged, so support in virtio-net would be a
> good follow on. But, flow-director does have some limitations and
> performance issues of it's own (forced pairing between TX and RX
> queues, lookup on every TX packet). 

True. But the pairing was designed to work without guest involving since
we really want to reduce the vmexits from guest. And lookup on every TX
packets could be released to every N packets. But I agree exposing the
API to guest may bring lots of flexibility.
> In the case of virtualization,
> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
> emulations and so far seems to be wins in most cases. Extending these
> down into the stack so that they can leverage HW mechanisms is a good
> goal for best performance. It's probably generally true that most of
> the offloads commonly available for NICs we'll want in virtualization
> path. Of course, we need to deomonstrate that they provide real
> performance benefit in this use case.

Yes, we need a prototype to see how much it can help.
>
> I believe tying in aRFS (or flow director) into a real aRFS is just a
> matter of programming the RFS table properly. This is not the complex
> side of the interface, I believe this already works with the tun
> patches.

Right, what we may needs is

- exposing new tun ioctls for qemu adding or removing a flow
- new virtqueue command for guest driver to adding or removing a flow
(btw, current control virtqueue is really slow, we may need to improve it).
- an agreement of host and guest to use the same hash method, or just
compute software hash in host and pass it to guest (which needs extra
API to do)
- change guest driver to use aRFS

Some of the above has been implemented in my old RFC.
>
>> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
>> list - it's still the same concern I had in the old email thread that
>> Jason mentioned.
>>
>> In order for virtio-net aRFS to make sense there needs to be an overall
>> plan for pushing flow mapping information down to the physical NIC.
>> That's the only way to actually achieve the benefit of steering:
>> processing the packet on the CPU where the application is running.
>>
> I don't think this is necessarily true. Per flow steering amongst
> virtual queues should be beneficial in itself. virtio-net can leverage
> RFS or aRFS where it's available.
>
>> If it's not possible or too hard to implement aRFS down the entire
>> stack, we won't be able to process the packet on the right CPU.
>> Then we might as well not bother with aRFS and just distribute uniformly
>> across the rx virtqueues.
>>
>> Please post an outline of how rx packets will be steered up the stack so
>> we can discuss whether aRFS can bring any benefit.
>>
> 1. The aRFS interface for the guest to specify which virtual queue to
> receive a packet on is fairly straight forward.
> 2. To hook into RFS, we need to match the virtual queue to the real
> CPU it will processed on, and then program the RFS table for that flow
> and CPU.
> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
> correct queue for the CPU.
>
>> Stefan
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-17  3:26         ` Jason Wang
@ 2014-01-17  5:08           ` Tom Herbert
  2014-01-17  6:36             ` Jason Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Tom Herbert @ 2014-01-17  5:08 UTC (permalink / raw)
  To: Jason Wang
  Cc: Stefan Hajnoczi, Zhi Yong Wu, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On Thu, Jan 16, 2014 at 7:26 PM, Jason Wang <jasowang@redhat.com> wrote:
> On 01/17/2014 01:12 AM, Tom Herbert wrote:
>> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>>>> CC: stefanha, MST, Rusty Russel
>>>>
>>>> ---------- Forwarded message ----------
>>>> From: Jason Wang <jasowang@redhat.com>
>>>> Date: Thu, Jan 16, 2014 at 12:23 PM
>>>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>>>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>>>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>>>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>>
>>>>
>>>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>>>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>>>>
>>>>> HI, folks
>>>>>
>>>>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>>>> aRFS will be used to select the RX queue. To make sure that it's going ahead
>>>>> in the correct direction, although it is still one RFC and isn't tested, it's
>>>>> post out ASAP. Any comment are appreciated, thanks.
>>>>>
>>>>> If anyone is interested in playing with it, you can get this patchset from my
>>>>> dev git on github:
>>>>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>>>>
>>>>> Zhi Yong Wu (3):
>>>>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>>>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>>>>    virtio-net: Add accelerated RFS support
>>>>>
>>>>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>>>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>>>>   include/linux/virtio_config.h |   12 +++++++
>>>>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>>>>
>>>> Please run get_maintainter.pl before sending the patch. You'd better
>>>> at least cc virtio maintainer/list for this.
>>>>
>>>> The core aRFS method is a noop in this RFC which make this series no
>>>> much sense to discuss. You should at least mention the big picture
>>>> here in the cover letter. I suggest you should post a RFC which can
>>>> run and has expected result or you can just raise a thread for the
>>>> design discussion.
>>>>
>>>> And this method has been discussed before, you can search "[net-next
>>>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>>>> for a very old prototype implemented by me. It can work and looks like
>>>> most of this RFC have already done there.
>>>>
>>>> A basic question is whether or not we need this, not all the mq cards
>>>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>>>> overheads? For virtio, we want to reduce the vmexits as much as
>>>> possible but this aRFS seems introduce a lot of more of this. Making a
>>>> complex interfaces just for an virtual device may not be good, simple
>>>> method may works for most of the cases.
>>>>
>>>> We really should consider to offload this to real nic. VMDq and L2
>>>> forwarding offload may help in this case.
>> Adding flow director support would be a good step, Zhi's patches for
>> support in tun have been merged, so support in virtio-net would be a
>> good follow on. But, flow-director does have some limitations and
>> performance issues of it's own (forced pairing between TX and RX
>> queues, lookup on every TX packet).
>
> True. But the pairing was designed to work without guest involving since
> we really want to reduce the vmexits from guest. And lookup on every TX
> packets could be released to every N packets. But I agree exposing the
> API to guest may bring lots of flexibility.
>> In the case of virtualization,
>> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
>> emulations and so far seems to be wins in most cases. Extending these
>> down into the stack so that they can leverage HW mechanisms is a good
>> goal for best performance. It's probably generally true that most of
>> the offloads commonly available for NICs we'll want in virtualization
>> path. Of course, we need to deomonstrate that they provide real
>> performance benefit in this use case.
>
> Yes, we need a prototype to see how much it can help.
>>
>> I believe tying in aRFS (or flow director) into a real aRFS is just a
>> matter of programming the RFS table properly. This is not the complex
>> side of the interface, I believe this already works with the tun
>> patches.
>
> Right, what we may needs is
>
> - exposing new tun ioctls for qemu adding or removing a flow
> - new virtqueue command for guest driver to adding or removing a flow
> (btw, current control virtqueue is really slow, we may need to improve it).
> - an agreement of host and guest to use the same hash method, or just
> compute software hash in host and pass it to guest (which needs extra
> API to do)

The model to get RX hash from a device is well known, the guest can
use that to reflect information about a flow back to the host, and for
performance we might piggyback RX queue selection on the TX
descriptors of a flow. Probably some limitations with real HW, but I
assume would have less issues in SW.

IMO, if we have a flow state on the host we should *never* need to
perform any hash computation on TX (a host is not a switch :-) ), we
may want to have some mirrored flow state in the kernel for these
flows which are indexed by the hash provided in TX.

> - change guest driver to use aRFS
>
> Some of the above has been implemented in my old RFC.

Looks pretty similar to Zhi's tun work. Are you planning to refresh
those patches?

>>
>>> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
>>> list - it's still the same concern I had in the old email thread that
>>> Jason mentioned.
>>>
>>> In order for virtio-net aRFS to make sense there needs to be an overall
>>> plan for pushing flow mapping information down to the physical NIC.
>>> That's the only way to actually achieve the benefit of steering:
>>> processing the packet on the CPU where the application is running.
>>>
>> I don't think this is necessarily true. Per flow steering amongst
>> virtual queues should be beneficial in itself. virtio-net can leverage
>> RFS or aRFS where it's available.
>>
>>> If it's not possible or too hard to implement aRFS down the entire
>>> stack, we won't be able to process the packet on the right CPU.
>>> Then we might as well not bother with aRFS and just distribute uniformly
>>> across the rx virtqueues.
>>>
>>> Please post an outline of how rx packets will be steered up the stack so
>>> we can discuss whether aRFS can bring any benefit.
>>>
>> 1. The aRFS interface for the guest to specify which virtual queue to
>> receive a packet on is fairly straight forward.
>> 2. To hook into RFS, we need to match the virtual queue to the real
>> CPU it will processed on, and then program the RFS table for that flow
>> and CPU.
>> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
>> correct queue for the CPU.
>>
>>> Stefan
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16 17:12       ` Tom Herbert
  2014-01-17  3:26         ` Jason Wang
@ 2014-01-17  5:22         ` Stefan Hajnoczi
  2014-01-17  6:45           ` Jason Wang
  2014-01-20 14:36           ` Ben Hutchings
  2014-01-22 13:27         ` Zhi Yong Wu
  2 siblings, 2 replies; 32+ messages in thread
From: Stefan Hajnoczi @ 2014-01-17  5:22 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Zhi Yong Wu, Linux Netdev List, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell, Jason Wang

On Thu, Jan 16, 2014 at 09:12:29AM -0800, Tom Herbert wrote:
> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
> >> CC: stefanha, MST, Rusty Russel
> >>
> >> ---------- Forwarded message ----------
> >> From: Jason Wang <jasowang@redhat.com>
> >> Date: Thu, Jan 16, 2014 at 12:23 PM
> >> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
> >> To: Zhi Yong Wu <zwu.kernel@gmail.com>
> >> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
> >> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> >>
> >>
> >> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
> >> >
> >> > From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
> >> >
> >> > HI, folks
> >> >
> >> > The patchset is trying to integrate aRFS support to virtio_net. In this case,
> >> > aRFS will be used to select the RX queue. To make sure that it's going ahead
> >> > in the correct direction, although it is still one RFC and isn't tested, it's
> >> > post out ASAP. Any comment are appreciated, thanks.
> >> >
> >> > If anyone is interested in playing with it, you can get this patchset from my
> >> > dev git on github:
> >> >    git://github.com/wuzhy/kernel.git virtnet_rfs
> >> >
> >> > Zhi Yong Wu (3):
> >> >    virtio_pci: Introduce one new config api vp_get_vq_irq()
> >> >    virtio_net: Introduce one dummy function virtnet_filter_rfs()
> >> >    virtio-net: Add accelerated RFS support
> >> >
> >> >   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
> >> >   drivers/virtio/virtio_pci.c   |   11 +++++++
> >> >   include/linux/virtio_config.h |   12 +++++++
> >> >   3 files changed, 89 insertions(+), 1 deletions(-)
> >> >
> >>
> >> Please run get_maintainter.pl before sending the patch. You'd better
> >> at least cc virtio maintainer/list for this.
> >>
> >> The core aRFS method is a noop in this RFC which make this series no
> >> much sense to discuss. You should at least mention the big picture
> >> here in the cover letter. I suggest you should post a RFC which can
> >> run and has expected result or you can just raise a thread for the
> >> design discussion.
> >>
> >> And this method has been discussed before, you can search "[net-next
> >> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
> >> for a very old prototype implemented by me. It can work and looks like
> >> most of this RFC have already done there.
> >>
> >> A basic question is whether or not we need this, not all the mq cards
> >> use aRFS (see ixgbe ATR). And whether or not it can bring extra
> >> overheads? For virtio, we want to reduce the vmexits as much as
> >> possible but this aRFS seems introduce a lot of more of this. Making a
> >> complex interfaces just for an virtual device may not be good, simple
> >> method may works for most of the cases.
> >>
> >> We really should consider to offload this to real nic. VMDq and L2
> >> forwarding offload may help in this case.
> >
> Adding flow director support would be a good step, Zhi's patches for
> support in tun have been merged, so support in virtio-net would be a
> good follow on. But, flow-director does have some limitations and
> performance issues of it's own (forced pairing between TX and RX
> queues, lookup on every TX packet). In the case of virtualization,
> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
> emulations and so far seems to be wins in most cases. Extending these
> down into the stack so that they can leverage HW mechanisms is a good
> goal for best performance. It's probably generally true that most of
> the offloads commonly available for NICs we'll want in virtualization
> path. Of course, we need to deomonstrate that they provide real
> performance benefit in this use case.
> 
> I believe tying in aRFS (or flow director) into a real aRFS is just a
> matter of programming the RFS table properly. This is not the complex
> side of the interface, I believe this already works with the tun
> patches.
> 
> > Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
> > list - it's still the same concern I had in the old email thread that
> > Jason mentioned.
> >
> > In order for virtio-net aRFS to make sense there needs to be an overall
> > plan for pushing flow mapping information down to the physical NIC.
> > That's the only way to actually achieve the benefit of steering:
> > processing the packet on the CPU where the application is running.
> >
> I don't think this is necessarily true. Per flow steering amongst
> virtual queues should be beneficial in itself. virtio-net can leverage
> RFS or aRFS where it's available.

I guess we need to see benchmark results :)

> > If it's not possible or too hard to implement aRFS down the entire
> > stack, we won't be able to process the packet on the right CPU.
> > Then we might as well not bother with aRFS and just distribute uniformly
> > across the rx virtqueues.
> >
> > Please post an outline of how rx packets will be steered up the stack so
> > we can discuss whether aRFS can bring any benefit.
> >
> 1. The aRFS interface for the guest to specify which virtual queue to
> receive a packet on is fairly straight forward.
> 2. To hook into RFS, we need to match the virtual queue to the real
> CPU it will processed on, and then program the RFS table for that flow
> and CPU.
> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
> correct queue for the CPU.

There are a lot of details that are not yet worked out:

If you want to implement aRFS down the vhost_net + macvtap path
(probably easiest?) how will Step 2 work?  Do the necessary kernel
interfaces exist to take the flow information in vhost_net, give them to
macvtap, and finally push them down to the physical NIC?

Not sure if aRFS will work down the full stack with vhost_net + tap +
bridge.  Any ideas?

At the QEMU level it is currently pointless to implement virtio-net aRFS
emulation since the QEMU global mutex is taken and virtio-net emulation
is not multi-threaded.

I think aRFS is a good thing, we just need to see performance results
and know that this won't be a dead end after merging changes to
virtio-net and the virtio specification.

Stefan

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-17  5:08           ` Tom Herbert
@ 2014-01-17  6:36             ` Jason Wang
  2014-01-17 16:03               ` Tom Herbert
  0 siblings, 1 reply; 32+ messages in thread
From: Jason Wang @ 2014-01-17  6:36 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Stefan Hajnoczi, Zhi Yong Wu, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On 01/17/2014 01:08 PM, Tom Herbert wrote:
> On Thu, Jan 16, 2014 at 7:26 PM, Jason Wang <jasowang@redhat.com> wrote:
>> On 01/17/2014 01:12 AM, Tom Herbert wrote:
>>> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>>> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>>>>> CC: stefanha, MST, Rusty Russel
>>>>>
>>>>> ---------- Forwarded message ----------
>>>>> From: Jason Wang <jasowang@redhat.com>
>>>>> Date: Thu, Jan 16, 2014 at 12:23 PM
>>>>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>>>>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>>>>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>>>>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>>>
>>>>>
>>>>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>>>>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>>>>>
>>>>>> HI, folks
>>>>>>
>>>>>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>>>>> aRFS will be used to select the RX queue. To make sure that it's going ahead
>>>>>> in the correct direction, although it is still one RFC and isn't tested, it's
>>>>>> post out ASAP. Any comment are appreciated, thanks.
>>>>>>
>>>>>> If anyone is interested in playing with it, you can get this patchset from my
>>>>>> dev git on github:
>>>>>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>>>>>
>>>>>> Zhi Yong Wu (3):
>>>>>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>>>>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>>>>>    virtio-net: Add accelerated RFS support
>>>>>>
>>>>>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>>>>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>>>>>   include/linux/virtio_config.h |   12 +++++++
>>>>>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>>>>>
>>>>> Please run get_maintainter.pl before sending the patch. You'd better
>>>>> at least cc virtio maintainer/list for this.
>>>>>
>>>>> The core aRFS method is a noop in this RFC which make this series no
>>>>> much sense to discuss. You should at least mention the big picture
>>>>> here in the cover letter. I suggest you should post a RFC which can
>>>>> run and has expected result or you can just raise a thread for the
>>>>> design discussion.
>>>>>
>>>>> And this method has been discussed before, you can search "[net-next
>>>>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>>>>> for a very old prototype implemented by me. It can work and looks like
>>>>> most of this RFC have already done there.
>>>>>
>>>>> A basic question is whether or not we need this, not all the mq cards
>>>>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>>>>> overheads? For virtio, we want to reduce the vmexits as much as
>>>>> possible but this aRFS seems introduce a lot of more of this. Making a
>>>>> complex interfaces just for an virtual device may not be good, simple
>>>>> method may works for most of the cases.
>>>>>
>>>>> We really should consider to offload this to real nic. VMDq and L2
>>>>> forwarding offload may help in this case.
>>> Adding flow director support would be a good step, Zhi's patches for
>>> support in tun have been merged, so support in virtio-net would be a
>>> good follow on. But, flow-director does have some limitations and
>>> performance issues of it's own (forced pairing between TX and RX
>>> queues, lookup on every TX packet).
>> True. But the pairing was designed to work without guest involving since
>> we really want to reduce the vmexits from guest. And lookup on every TX
>> packets could be released to every N packets. But I agree exposing the
>> API to guest may bring lots of flexibility.
>>> In the case of virtualization,
>>> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
>>> emulations and so far seems to be wins in most cases. Extending these
>>> down into the stack so that they can leverage HW mechanisms is a good
>>> goal for best performance. It's probably generally true that most of
>>> the offloads commonly available for NICs we'll want in virtualization
>>> path. Of course, we need to deomonstrate that they provide real
>>> performance benefit in this use case.
>> Yes, we need a prototype to see how much it can help.
>>> I believe tying in aRFS (or flow director) into a real aRFS is just a
>>> matter of programming the RFS table properly. This is not the complex
>>> side of the interface, I believe this already works with the tun
>>> patches.
>> Right, what we may needs is
>>
>> - exposing new tun ioctls for qemu adding or removing a flow
>> - new virtqueue command for guest driver to adding or removing a flow
>> (btw, current control virtqueue is really slow, we may need to improve it).
>> - an agreement of host and guest to use the same hash method, or just
>> compute software hash in host and pass it to guest (which needs extra
>> API to do)
> The model to get RX hash from a device is well known, the guest can
> use that to reflect information about a flow back to the host, and for
> performance we might piggyback RX queue selection on the TX
> descriptors of a flow. Probably some limitations with real HW, but I
> assume would have less issues in SW.

It may work but may need extending the current virtio-net TX descriptor
or extra API such as vnet header.
>
> IMO, if we have a flow state on the host we should *never* need to
> perform any hash computation on TX (a host is not a switch :-) ), we
> may want to have some mirrored flow state in the kernel for these
> flows which are indexed by the hash provided in TX.

The problem is host may have several different type cards, so the it was
not guaranteed that they can provide the same rxhash.
>
>> - change guest driver to use aRFS
>>
>> Some of the above has been implemented in my old RFC.
> Looks pretty similar to Zhi's tun work. Are you planning to refresh
> those patches?

I have the plan. But there's another concern:

During my testing ( and also tested by some IBM engineers in the past),
we find it's better for a single vhost thread to handle both rx and tx
for a single flow. Using two different vhost threads to handle a flow
may damage the performance in most of the cases. That's why we enforce
the pairing of rx and tx in tun currently. But looks like aRFS can't
guarantee this. If we want to enforce this paring through XPS/irq
affinity, there's no need for aRFS.
>
>>>> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
>>>> list - it's still the same concern I had in the old email thread that
>>>> Jason mentioned.
>>>>
>>>> In order for virtio-net aRFS to make sense there needs to be an overall
>>>> plan for pushing flow mapping information down to the physical NIC.
>>>> That's the only way to actually achieve the benefit of steering:
>>>> processing the packet on the CPU where the application is running.
>>>>
>>> I don't think this is necessarily true. Per flow steering amongst
>>> virtual queues should be beneficial in itself. virtio-net can leverage
>>> RFS or aRFS where it's available.
>>>
>>>> If it's not possible or too hard to implement aRFS down the entire
>>>> stack, we won't be able to process the packet on the right CPU.
>>>> Then we might as well not bother with aRFS and just distribute uniformly
>>>> across the rx virtqueues.
>>>>
>>>> Please post an outline of how rx packets will be steered up the stack so
>>>> we can discuss whether aRFS can bring any benefit.
>>>>
>>> 1. The aRFS interface for the guest to specify which virtual queue to
>>> receive a packet on is fairly straight forward.
>>> 2. To hook into RFS, we need to match the virtual queue to the real
>>> CPU it will processed on, and then program the RFS table for that flow
>>> and CPU.
>>> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
>>> correct queue for the CPU.
>>>
>>>> Stefan
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-17  5:22         ` Stefan Hajnoczi
@ 2014-01-17  6:45           ` Jason Wang
  2014-01-20 14:36           ` Ben Hutchings
  1 sibling, 0 replies; 32+ messages in thread
From: Jason Wang @ 2014-01-17  6:45 UTC (permalink / raw)
  To: Stefan Hajnoczi, Tom Herbert
  Cc: Zhi Yong Wu, Linux Netdev List, Eric Dumazet, David S. Miller,
	Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On 01/17/2014 01:22 PM, Stefan Hajnoczi wrote:
> On Thu, Jan 16, 2014 at 09:12:29AM -0800, Tom Herbert wrote:
>> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>>>> CC: stefanha, MST, Rusty Russel
>>>>
>>>> ---------- Forwarded message ----------
>>>> From: Jason Wang <jasowang@redhat.com>
>>>> Date: Thu, Jan 16, 2014 at 12:23 PM
>>>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>>>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>>>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>>>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>>
>>>>
>>>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>>>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>>>>
>>>>> HI, folks
>>>>>
>>>>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>>>> aRFS will be used to select the RX queue. To make sure that it's going ahead
>>>>> in the correct direction, although it is still one RFC and isn't tested, it's
>>>>> post out ASAP. Any comment are appreciated, thanks.
>>>>>
>>>>> If anyone is interested in playing with it, you can get this patchset from my
>>>>> dev git on github:
>>>>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>>>>
>>>>> Zhi Yong Wu (3):
>>>>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>>>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>>>>    virtio-net: Add accelerated RFS support
>>>>>
>>>>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>>>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>>>>   include/linux/virtio_config.h |   12 +++++++
>>>>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>>>>
>>>> Please run get_maintainter.pl before sending the patch. You'd better
>>>> at least cc virtio maintainer/list for this.
>>>>
>>>> The core aRFS method is a noop in this RFC which make this series no
>>>> much sense to discuss. You should at least mention the big picture
>>>> here in the cover letter. I suggest you should post a RFC which can
>>>> run and has expected result or you can just raise a thread for the
>>>> design discussion.
>>>>
>>>> And this method has been discussed before, you can search "[net-next
>>>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>>>> for a very old prototype implemented by me. It can work and looks like
>>>> most of this RFC have already done there.
>>>>
>>>> A basic question is whether or not we need this, not all the mq cards
>>>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>>>> overheads? For virtio, we want to reduce the vmexits as much as
>>>> possible but this aRFS seems introduce a lot of more of this. Making a
>>>> complex interfaces just for an virtual device may not be good, simple
>>>> method may works for most of the cases.
>>>>
>>>> We really should consider to offload this to real nic. VMDq and L2
>>>> forwarding offload may help in this case.
>> Adding flow director support would be a good step, Zhi's patches for
>> support in tun have been merged, so support in virtio-net would be a
>> good follow on. But, flow-director does have some limitations and
>> performance issues of it's own (forced pairing between TX and RX
>> queues, lookup on every TX packet). In the case of virtualization,
>> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
>> emulations and so far seems to be wins in most cases. Extending these
>> down into the stack so that they can leverage HW mechanisms is a good
>> goal for best performance. It's probably generally true that most of
>> the offloads commonly available for NICs we'll want in virtualization
>> path. Of course, we need to deomonstrate that they provide real
>> performance benefit in this use case.
>>
>> I believe tying in aRFS (or flow director) into a real aRFS is just a
>> matter of programming the RFS table properly. This is not the complex
>> side of the interface, I believe this already works with the tun
>> patches.
>>
>>> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
>>> list - it's still the same concern I had in the old email thread that
>>> Jason mentioned.
>>>
>>> In order for virtio-net aRFS to make sense there needs to be an overall
>>> plan for pushing flow mapping information down to the physical NIC.
>>> That's the only way to actually achieve the benefit of steering:
>>> processing the packet on the CPU where the application is running.
>>>
>> I don't think this is necessarily true. Per flow steering amongst
>> virtual queues should be beneficial in itself. virtio-net can leverage
>> RFS or aRFS where it's available.
> I guess we need to see benchmark results :)
>
>>> If it's not possible or too hard to implement aRFS down the entire
>>> stack, we won't be able to process the packet on the right CPU.
>>> Then we might as well not bother with aRFS and just distribute uniformly
>>> across the rx virtqueues.
>>>
>>> Please post an outline of how rx packets will be steered up the stack so
>>> we can discuss whether aRFS can bring any benefit.
>>>
>> 1. The aRFS interface for the guest to specify which virtual queue to
>> receive a packet on is fairly straight forward.
>> 2. To hook into RFS, we need to match the virtual queue to the real
>> CPU it will processed on, and then program the RFS table for that flow
>> and CPU.
>> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
>> correct queue for the CPU.
> There are a lot of details that are not yet worked out:
>
> If you want to implement aRFS down the vhost_net + macvtap path
> (probably easiest?) how will Step 2 work?  Do the necessary kernel
> interfaces exist to take the flow information in vhost_net, give them to
> macvtap, and finally push them down to the physical NIC?
>
> Not sure if aRFS will work down the full stack with vhost_net + tap +
> bridge.  Any ideas?

It actually works, tun will record the flow to cpu mapping through RFS
when it receive a packet from vhost_net. And host card driver will use
this info to program the hardware flow director to let it send the
interrupt directly to the cpu where vhost_net is running.
>
> At the QEMU level it is currently pointless to implement virtio-net aRFS
> emulation since the QEMU global mutex is taken and virtio-net emulation
> is not multi-threaded.

I don't think qemu is the proper layer to do this. It should be the work
of tun/macvtap which allows us to cooperate more with kernel.
>
> I think aRFS is a good thing, we just need to see performance results
> and know that this won't be a dead end after merging changes to
> virtio-net and the virtio specification.
>
> Stefan
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-17  6:36             ` Jason Wang
@ 2014-01-17 16:03               ` Tom Herbert
  0 siblings, 0 replies; 32+ messages in thread
From: Tom Herbert @ 2014-01-17 16:03 UTC (permalink / raw)
  To: Jason Wang
  Cc: Stefan Hajnoczi, Zhi Yong Wu, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On Thu, Jan 16, 2014 at 10:36 PM, Jason Wang <jasowang@redhat.com> wrote:
> On 01/17/2014 01:08 PM, Tom Herbert wrote:
>> On Thu, Jan 16, 2014 at 7:26 PM, Jason Wang <jasowang@redhat.com> wrote:
>>> On 01/17/2014 01:12 AM, Tom Herbert wrote:
>>>> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>>>> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>>>>>> CC: stefanha, MST, Rusty Russel
>>>>>>
>>>>>> ---------- Forwarded message ----------
>>>>>> From: Jason Wang <jasowang@redhat.com>
>>>>>> Date: Thu, Jan 16, 2014 at 12:23 PM
>>>>>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>>>>>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>>>>>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>>>>>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>>>>
>>>>>>
>>>>>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>>>>>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>>>>>>
>>>>>>> HI, folks
>>>>>>>
>>>>>>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>>>>>> aRFS will be used to select the RX queue. To make sure that it's going ahead
>>>>>>> in the correct direction, although it is still one RFC and isn't tested, it's
>>>>>>> post out ASAP. Any comment are appreciated, thanks.
>>>>>>>
>>>>>>> If anyone is interested in playing with it, you can get this patchset from my
>>>>>>> dev git on github:
>>>>>>>    git://github.com/wuzhy/kernel.git virtnet_rfs
>>>>>>>
>>>>>>> Zhi Yong Wu (3):
>>>>>>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>>>>>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>>>>>>    virtio-net: Add accelerated RFS support
>>>>>>>
>>>>>>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>>>>>>   drivers/virtio/virtio_pci.c   |   11 +++++++
>>>>>>>   include/linux/virtio_config.h |   12 +++++++
>>>>>>>   3 files changed, 89 insertions(+), 1 deletions(-)
>>>>>>>
>>>>>> Please run get_maintainter.pl before sending the patch. You'd better
>>>>>> at least cc virtio maintainer/list for this.
>>>>>>
>>>>>> The core aRFS method is a noop in this RFC which make this series no
>>>>>> much sense to discuss. You should at least mention the big picture
>>>>>> here in the cover letter. I suggest you should post a RFC which can
>>>>>> run and has expected result or you can just raise a thread for the
>>>>>> design discussion.
>>>>>>
>>>>>> And this method has been discussed before, you can search "[net-next
>>>>>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>>>>>> for a very old prototype implemented by me. It can work and looks like
>>>>>> most of this RFC have already done there.
>>>>>>
>>>>>> A basic question is whether or not we need this, not all the mq cards
>>>>>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>>>>>> overheads? For virtio, we want to reduce the vmexits as much as
>>>>>> possible but this aRFS seems introduce a lot of more of this. Making a
>>>>>> complex interfaces just for an virtual device may not be good, simple
>>>>>> method may works for most of the cases.
>>>>>>
>>>>>> We really should consider to offload this to real nic. VMDq and L2
>>>>>> forwarding offload may help in this case.
>>>> Adding flow director support would be a good step, Zhi's patches for
>>>> support in tun have been merged, so support in virtio-net would be a
>>>> good follow on. But, flow-director does have some limitations and
>>>> performance issues of it's own (forced pairing between TX and RX
>>>> queues, lookup on every TX packet).
>>> True. But the pairing was designed to work without guest involving since
>>> we really want to reduce the vmexits from guest. And lookup on every TX
>>> packets could be released to every N packets. But I agree exposing the
>>> API to guest may bring lots of flexibility.
>>>> In the case of virtualization,
>>>> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
>>>> emulations and so far seems to be wins in most cases. Extending these
>>>> down into the stack so that they can leverage HW mechanisms is a good
>>>> goal for best performance. It's probably generally true that most of
>>>> the offloads commonly available for NICs we'll want in virtualization
>>>> path. Of course, we need to deomonstrate that they provide real
>>>> performance benefit in this use case.
>>> Yes, we need a prototype to see how much it can help.
>>>> I believe tying in aRFS (or flow director) into a real aRFS is just a
>>>> matter of programming the RFS table properly. This is not the complex
>>>> side of the interface, I believe this already works with the tun
>>>> patches.
>>> Right, what we may needs is
>>>
>>> - exposing new tun ioctls for qemu adding or removing a flow
>>> - new virtqueue command for guest driver to adding or removing a flow
>>> (btw, current control virtqueue is really slow, we may need to improve it).
>>> - an agreement of host and guest to use the same hash method, or just
>>> compute software hash in host and pass it to guest (which needs extra
>>> API to do)
>> The model to get RX hash from a device is well known, the guest can
>> use that to reflect information about a flow back to the host, and for
>> performance we might piggyback RX queue selection on the TX
>> descriptors of a flow. Probably some limitations with real HW, but I
>> assume would have less issues in SW.
>
> It may work but may need extending the current virtio-net TX descriptor
> or extra API such as vnet header.
>>
>> IMO, if we have a flow state on the host we should *never* need to
>> perform any hash computation on TX (a host is not a switch :-) ), we
>> may want to have some mirrored flow state in the kernel for these
>> flows which are indexed by the hash provided in TX.
>
> The problem is host may have several different type cards, so the it was
> not guaranteed that they can provide the same rxhash.

RIght, that property has to be taken into account with all the uses of
rxhash, we can never compute a hash on the host with the expectation
it to match a HW rxhash (this was a problem with the original tun flow
mapping code). What is done is to cache the rxhash in the flow state
(e.g. TCP). This is the value that is used to program the RFS table
and can be sent back to the device to map match the flow. With a
guest, the HW hash can be propagated all the way to the guest flow
state, aRFS is one way to reflect this back to the kernel and
potentially all the way to the device.

>>
>>> - change guest driver to use aRFS
>>>
>>> Some of the above has been implemented in my old RFC.
>> Looks pretty similar to Zhi's tun work. Are you planning to refresh
>> those patches?
>
> I have the plan. But there's another concern:
>
> During my testing ( and also tested by some IBM engineers in the past),
> we find it's better for a single vhost thread to handle both rx and tx
> for a single flow. Using two different vhost threads to handle a flow
> may damage the performance in most of the cases. That's why we enforce
> the pairing of rx and tx in tun currently. But looks like aRFS can't
> guarantee this. If we want to enforce this paring through XPS/irq
> affinity, there's no need for aRFS.
>>
>>>>> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
>>>>> list - it's still the same concern I had in the old email thread that
>>>>> Jason mentioned.
>>>>>
>>>>> In order for virtio-net aRFS to make sense there needs to be an overall
>>>>> plan for pushing flow mapping information down to the physical NIC.
>>>>> That's the only way to actually achieve the benefit of steering:
>>>>> processing the packet on the CPU where the application is running.
>>>>>
>>>> I don't think this is necessarily true. Per flow steering amongst
>>>> virtual queues should be beneficial in itself. virtio-net can leverage
>>>> RFS or aRFS where it's available.
>>>>
>>>>> If it's not possible or too hard to implement aRFS down the entire
>>>>> stack, we won't be able to process the packet on the right CPU.
>>>>> Then we might as well not bother with aRFS and just distribute uniformly
>>>>> across the rx virtqueues.
>>>>>
>>>>> Please post an outline of how rx packets will be steered up the stack so
>>>>> we can discuss whether aRFS can bring any benefit.
>>>>>
>>>> 1. The aRFS interface for the guest to specify which virtual queue to
>>>> receive a packet on is fairly straight forward.
>>>> 2. To hook into RFS, we need to match the virtual queue to the real
>>>> CPU it will processed on, and then program the RFS table for that flow
>>>> and CPU.
>>>> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
>>>> correct queue for the CPU.
>>>>
>>>>> Stefan
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-16 23:16       ` Ben Hutchings
@ 2014-01-17 16:54         ` Zhi Yong Wu
  2014-01-17 17:20           ` Ben Hutchings
  0 siblings, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-17 16:54 UTC (permalink / raw)
  To: Ben Hutchings, Stefan Hajnoczi
  Cc: Linux Netdev List, Tom Herbert, Eric Dumazet, David S. Miller,
	Zhi Yong Wu

On Fri, Jan 17, 2014 at 7:16 AM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Fri, 2014-01-17 at 06:00 +0800, Zhi Yong Wu wrote:
>> On Fri, Jan 17, 2014 at 5:31 AM, Ben Hutchings
>> <bhutchings@solarflare.com> wrote:
>> > On Wed, 2014-01-15 at 22:20 +0800, Zhi Yong Wu wrote:
>> > [...]
>> >> +static int virtnet_init_rx_cpu_rmap(struct virtnet_info *vi)
>> >> +{
>> >> +     int rc = 0;
>> >> +
>> >> +#ifdef CONFIG_RFS_ACCEL
>> >> +     struct virtio_device *vdev = vi->vdev;
>> >> +     unsigned int irq;
>> >> +     int i;
>> >> +
>> >> +     if (!vi->affinity_hint_set)
>> >> +             goto out;
>> >> +
>> >> +     vi->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(vi->max_queue_pairs);
>> >> +     if (!vi->dev->rx_cpu_rmap) {
>> >> +             rc = -ENOMEM;
>> >> +             goto out;
>> >> +     }
>> >> +
>> >> +     for (i = 0; i < vi->max_queue_pairs; i++) {
>> >> +             irq = virtqueue_get_vq_irq(vdev, vi->rq[i].vq);
>> >> +             if (irq == -1)
>> >> +                     goto failed;
>> >
>> > Jumping into an if-statement is confusing.  Also do you really want to
>> > return 0 in this case?
>> No, If it fail to get irq, i want it to exit as soon as possible,
>> otherwise it will cause irq_cpu_rmap_add() to be invoked with one
>> incorrect argument irq.
>
> Well currently this goto does result in returning 0, as rc has not been
> changed after its initialisation to 0.
Yes, this goto will result in returning 0. I am thinking if it it
appropriate, but definitely  the current code has one bug here. I
thought that when this NIC doesn't enable MSI-X support, and if
CONFIG_RFS_ACCEL is defined, its aRFS cpu_rmap table will not be
allocated here and aRFS will be ignored automatically, but this will
trigger set_rps_cpu() to hit memory issue.

>
>> By the way, do you have thought about if it makes sense to add aRFS
>> support to virtio_net? For [patch 2/3], what do you think of those
>> missing stuff listed by me?
>> For how indirect table is implemented in sfc NIC, do you have any doc
>> to share with  me? thanks.
>
> Going through that list:
>
>> 1.)  guest virtio_net driver should have one filter table and its
>> entries can be expired periodically;
>
> In sfc, we keep a count how many entries have been inserted in each NAPI
> context.  Whenever the NAPI poll function is about to call
> napi_complete() and the count for that context has reached a trigger
> level, it will scan some quota of filter entries for expiry.
thanks for your sharing.
>
>> 2.)  guest virtio_net driver should pass rx queue index and filter
>> info down to the emulated virtio_net NIC in QEMU.
>> 3.) the emulated virtio_net NIC should have its indirect table to
>> store the flow to rx queue mapping.
>> 4.) the emulated virtio_net NIC should classify the rx packet to
>> selected queue by applying the filter.
>
> I think the most efficient way to do this would be to put a hash table
> in some shared memory that both guest and host can read and write.  The
> virtio control path would only be used to set up and tear down the
> table.  I don't know whether virtio allows for that.
Yes, i agree with you, and don't know what stefan think if it. CC
stefanha. He is virtio expert. Stefan, Can you give some advice about
this?

>
> However, to take advantage of ARFS on a physical net driver, it would be
> necessary to send a control request for part 2.
aRFS on a physical net driver? What is this physical net driver? I
thought that in order to enable aRFS, guest virtio_net driver should
send a control request to its emulated virtio_net NIC.
>
>> 5.) update virtio spec.
>> Do i miss anything? If yes, please correct me.
>> For 3.) and 4.), do you have any doc about how they are implemented in
>> physical NICs? e.g. mlx4_en or sfc, etc.
>
> The Programmer's Reference Manuals for Solarflare controllers are only
> available under NDA.  I can describe the hardware filtering briefly, but
> actually I don't think it's very relevant to virtio_net.
>
> There is a typical RSS hash indirection table (128 entries), but for
> ARFS we use a different RX filter table which has 8K entries
> (RX_FILTER_TBL0 on SFC4000/SFC9000 family).
>
> Solarflare controllers support user-level networking, which requires
> perfect filtering to deliver each application's flows into that
> application's dedicated RX queue(s).  Lookups in this larger filter
> table are still hash-based, but each entry specifies a TCP/IP or UDP/IP
> 4-tuple or local 2-tuple to match.  ARFS uses the 4-tuple type only.
>
> To allow for hash collisions, a secondary hash function generates an
> increment to be added to the initial table index repeatedly for hash
> chaining.  There is a control register which tells the controller the
> maximum hash chain length to search for each IP filter type; after this
> it will fall back to checking MAC filters and then default filters.
>
> On the SFC9100 family, filter updates and lookups are implemented by
> firmware and the driver doesn't manage the filter table itself, but I
> know it is still a hash table of perfect filters.
>
> For ARFS, perfect filtering is not needed.  I think it would be
> preferable to use a fairly big hash table and make insertion fail in
> case of a collision.  Since the backend for virtio_net will do RX queue
> selection in software, the entire table of queue indices should fit into
> its L1 cache.
It is exactly in detail, thanks a lot for your nice sharing. It is
very helpful to me. I will come back to carefully read this after we
reach the agreement about if aRFS should be integrated into virtio_net
and what its big picture is.

>
> Ben.
>
> --
> Ben Hutchings, Staff Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-17 16:54         ` Zhi Yong Wu
@ 2014-01-17 17:20           ` Ben Hutchings
  2014-01-18  4:59             ` Tom Herbert
  0 siblings, 1 reply; 32+ messages in thread
From: Ben Hutchings @ 2014-01-17 17:20 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Stefan Hajnoczi, Linux Netdev List, Tom Herbert, Eric Dumazet,
	David S. Miller, Zhi Yong Wu

On Sat, 2014-01-18 at 00:54 +0800, Zhi Yong Wu wrote:
> On Fri, Jan 17, 2014 at 7:16 AM, Ben Hutchings
> <bhutchings@solarflare.com> wrote:
[...]
> > However, to take advantage of ARFS on a physical net driver, it would be
> > necessary to send a control request for part 2.
> aRFS on a physical net driver? What is this physical net driver? I
> thought that in order to enable aRFS, guest virtio_net driver should
> send a control request to its emulated virtio_net NIC.
[...]

If the backend is connected to a macvlan device on top of a physical net
device that supports ARFS, then there is further potential for improving
performance by steering to the best physical RX queue and CPU as well as
the best virtio_net RX queue and vCPU.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-17 17:20           ` Ben Hutchings
@ 2014-01-18  4:59             ` Tom Herbert
  2014-01-18 14:19               ` Ben Hutchings
  0 siblings, 1 reply; 32+ messages in thread
From: Tom Herbert @ 2014-01-18  4:59 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Zhi Yong Wu, Stefan Hajnoczi, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu

Ben,

I've never quite understood why flow management in aRFS has to be done
with separate messages, and if I recall this seems to mitigate
performance gains to a large extent. It seems like we should be able
to piggyback on a TX descriptor for a connection information about the
RX side for that connection, namely the rxhash and queue mapping.
State creation should be implicit by just seeing a new rxhash value,
tear down might be accomplished with a separate flag on the final TX
packet on the connection (this would need some additional logic in the
stack). Is this method not feasible in either NICs or virtio-net?

On Fri, Jan 17, 2014 at 9:20 AM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Sat, 2014-01-18 at 00:54 +0800, Zhi Yong Wu wrote:
>> On Fri, Jan 17, 2014 at 7:16 AM, Ben Hutchings
>> <bhutchings@solarflare.com> wrote:
> [...]
>> > However, to take advantage of ARFS on a physical net driver, it would be
>> > necessary to send a control request for part 2.
>> aRFS on a physical net driver? What is this physical net driver? I
>> thought that in order to enable aRFS, guest virtio_net driver should
>> send a control request to its emulated virtio_net NIC.
> [...]
>
> If the backend is connected to a macvlan device on top of a physical net
> device that supports ARFS, then there is further potential for improving
> performance by steering to the best physical RX queue and CPU as well as
> the best virtio_net RX queue and vCPU.
>
> Ben.
>
> --
> Ben Hutchings, Staff Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support
  2014-01-18  4:59             ` Tom Herbert
@ 2014-01-18 14:19               ` Ben Hutchings
  0 siblings, 0 replies; 32+ messages in thread
From: Ben Hutchings @ 2014-01-18 14:19 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Zhi Yong Wu, Stefan Hajnoczi, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu

On Fri, 2014-01-17 at 20:59 -0800, Tom Herbert wrote:
> Ben,
> 
> I've never quite understood why flow management in aRFS has to be done
> with separate messages, and if I recall this seems to mitigate
> performance gains to a large extent. It seems like we should be able
> to piggyback on a TX descriptor for a connection information about the
> RX side for that connection, namely the rxhash and queue mapping.
> State creation should be implicit by just seeing a new rxhash value,
> tear down might be accomplished with a separate flag on the final TX
> packet on the connection (this would need some additional logic in the
> stack). Is this method not feasible in either NICs or virtio-net?

Well that's roughly how Flow Director works, isn't it?  So it is
feasible on at least one NIC!  It might be possible to implement
something like that in firmware on the SFC9100 (with the filter based on
the following packet headers, not a hash), but I don't know.  As for
other vendors - I have no idea.

Inserting filters from the receive path seemed like a natural extension
of the software RFS implementation.  And it means that the hardware
filters are inserted a little earlier (no need to transmit another
packet), but maybe that doesn't matter much.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-17  5:22         ` Stefan Hajnoczi
  2014-01-17  6:45           ` Jason Wang
@ 2014-01-20 14:36           ` Ben Hutchings
  1 sibling, 0 replies; 32+ messages in thread
From: Ben Hutchings @ 2014-01-20 14:36 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Tom Herbert, Zhi Yong Wu, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell,
	Jason Wang

On Fri, 2014-01-17 at 13:22 +0800, Stefan Hajnoczi wrote:
> On Thu, Jan 16, 2014 at 09:12:29AM -0800, Tom Herbert wrote:
> > On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
[...]
> > > If it's not possible or too hard to implement aRFS down the entire
> > > stack, we won't be able to process the packet on the right CPU.
> > > Then we might as well not bother with aRFS and just distribute uniformly
> > > across the rx virtqueues.
> > >
> > > Please post an outline of how rx packets will be steered up the stack so
> > > we can discuss whether aRFS can bring any benefit.
> > >
> > 1. The aRFS interface for the guest to specify which virtual queue to
> > receive a packet on is fairly straight forward.
> > 2. To hook into RFS, we need to match the virtual queue to the real
> > CPU it will processed on, and then program the RFS table for that flow
> > and CPU.
> > 3. NIC aRFS keys off the RFS tables so it can program the HW with the
> > correct queue for the CPU.
> 
> There are a lot of details that are not yet worked out:
> 
> If you want to implement aRFS down the vhost_net + macvtap path
> (probably easiest?) how will Step 2 work?  Do the necessary kernel
> interfaces exist to take the flow information in vhost_net, give them to
> macvtap, and finally push them down to the physical NIC?
>
> Not sure if aRFS will work down the full stack with vhost_net + tap +
> bridge.  Any ideas?
[...]

Currently ARFS identifies the flow to be steered by passing an skb from
that flow to the driver, not just a hash.  This is important for the sfc
driver because we're using perfect filters for ARFS and we need to pick
out the relevant header fields instead of the RSS hash.

If you try to set an ARFS filter synchronously from the guest, this
would require a different driver operation, and if the guest only
provides a hash then sfc probably would not be able to support it.

An alternative would be that the hash is used to update the
rps_flow_table for the physical RX queue and then ARFS on the host can
insert a filter after the *next* packet.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-17  3:04       ` Jason Wang
@ 2014-01-20 16:49         ` Stefan Hajnoczi
  0 siblings, 0 replies; 32+ messages in thread
From: Stefan Hajnoczi @ 2014-01-20 16:49 UTC (permalink / raw)
  To: Jason Wang
  Cc: Zhi Yong Wu, Linux Netdev List, Tom Herbert, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell

On Fri, Jan 17, 2014 at 11:04:00AM +0800, Jason Wang wrote:
> On 01/16/2014 04:52 PM, Stefan Hajnoczi wrote:
> > On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
> >> CC: stefanha, MST, Rusty Russel
> >>
> >> ---------- Forwarded message ----------
> >> From: Jason Wang <jasowang@redhat.com>
> >> Date: Thu, Jan 16, 2014 at 12:23 PM
> >> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
> >> To: Zhi Yong Wu <zwu.kernel@gmail.com>
> >> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
> >> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> >>
> >>
> >> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
> >>> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
> >>>
> >>> HI, folks
> >>>
> >>> The patchset is trying to integrate aRFS support to virtio_net. In this case,
> >>> aRFS will be used to select the RX queue. To make sure that it's going ahead
> >>> in the correct direction, although it is still one RFC and isn't tested, it's
> >>> post out ASAP. Any comment are appreciated, thanks.
> >>>
> >>> If anyone is interested in playing with it, you can get this patchset from my
> >>> dev git on github:
> >>>    git://github.com/wuzhy/kernel.git virtnet_rfs
> >>>
> >>> Zhi Yong Wu (3):
> >>>    virtio_pci: Introduce one new config api vp_get_vq_irq()
> >>>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
> >>>    virtio-net: Add accelerated RFS support
> >>>
> >>>   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
> >>>   drivers/virtio/virtio_pci.c   |   11 +++++++
> >>>   include/linux/virtio_config.h |   12 +++++++
> >>>   3 files changed, 89 insertions(+), 1 deletions(-)
> >>>
> >> Please run get_maintainter.pl before sending the patch. You'd better
> >> at least cc virtio maintainer/list for this.
> >>
> >> The core aRFS method is a noop in this RFC which make this series no
> >> much sense to discuss. You should at least mention the big picture
> >> here in the cover letter. I suggest you should post a RFC which can
> >> run and has expected result or you can just raise a thread for the
> >> design discussion.
> >>
> >> And this method has been discussed before, you can search "[net-next
> >> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
> >> for a very old prototype implemented by me. It can work and looks like
> >> most of this RFC have already done there.
> >>
> >> A basic question is whether or not we need this, not all the mq cards
> >> use aRFS (see ixgbe ATR). And whether or not it can bring extra
> >> overheads? For virtio, we want to reduce the vmexits as much as
> >> possible but this aRFS seems introduce a lot of more of this. Making a
> >> complex interfaces just for an virtual device may not be good, simple
> >> method may works for most of the cases.
> >>
> >> We really should consider to offload this to real nic. VMDq and L2
> >> forwarding offload may help in this case.
> > Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
> > list - it's still the same concern I had in the old email thread that
> > Jason mentioned.
> >
> > In order for virtio-net aRFS to make sense there needs to be an overall
> > plan for pushing flow mapping information down to the physical NIC.
> > That's the only way to actually achieve the benefit of steering:
> > processing the packet on the CPU where the application is running.
> 
> There are several places that the packet needs to be processing:
> 1) If you mean send the interrupt of virtio-net to the vcpu that the
> application is running, current virito-net has already had the ability
> with the help of flow caches of tun and the vcpu private queue
> configuration of XPS and irq affinity hint which is automatically done
> through the driver itself.
> 2) If you mean send the interrupt or doing the softirq of the host card
> to the cpu where vhost thread is running, recent RFS support of tun can
> do this.
> 3) And about the affinity of vhost thread and vcpu thread, this seems
> beyond the scope of aRFS.

I guess you're right, the vhost thread affinity is a factor if we're
trying to take the interrupt on the host CPU that will eventually
execute the vcpu.  It's beyond the scope of this effort but something
that would be nice to eventually solve.

Stefan

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16 17:12       ` Tom Herbert
  2014-01-17  3:26         ` Jason Wang
  2014-01-17  5:22         ` Stefan Hajnoczi
@ 2014-01-22 13:27         ` Zhi Yong Wu
  2014-01-22 18:00           ` Tom Herbert
  2 siblings, 1 reply; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-22 13:27 UTC (permalink / raw)
  To: Tom Herbert, Ben Hutchings
  Cc: Stefan Hajnoczi, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell,
	Jason Wang

On Fri, Jan 17, 2014 at 1:12 AM, Tom Herbert <therbert@google.com> wrote:
> On Thu, Jan 16, 2014 at 12:52 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> On Thu, Jan 16, 2014 at 04:34:10PM +0800, Zhi Yong Wu wrote:
>>> CC: stefanha, MST, Rusty Russel
>>>
>>> ---------- Forwarded message ----------
>>> From: Jason Wang <jasowang@redhat.com>
>>> Date: Thu, Jan 16, 2014 at 12:23 PM
>>> Subject: Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
>>> To: Zhi Yong Wu <zwu.kernel@gmail.com>
>>> Cc: netdev@vger.kernel.org, therbert@google.com, edumazet@google.com,
>>> davem@davemloft.net, Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>
>>>
>>> On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
>>> >
>>> > From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
>>> >
>>> > HI, folks
>>> >
>>> > The patchset is trying to integrate aRFS support to virtio_net. In this case,
>>> > aRFS will be used to select the RX queue. To make sure that it's going ahead
>>> > in the correct direction, although it is still one RFC and isn't tested, it's
>>> > post out ASAP. Any comment are appreciated, thanks.
>>> >
>>> > If anyone is interested in playing with it, you can get this patchset from my
>>> > dev git on github:
>>> >    git://github.com/wuzhy/kernel.git virtnet_rfs
>>> >
>>> > Zhi Yong Wu (3):
>>> >    virtio_pci: Introduce one new config api vp_get_vq_irq()
>>> >    virtio_net: Introduce one dummy function virtnet_filter_rfs()
>>> >    virtio-net: Add accelerated RFS support
>>> >
>>> >   drivers/net/virtio_net.c      |   67 ++++++++++++++++++++++++++++++++++++++++-
>>> >   drivers/virtio/virtio_pci.c   |   11 +++++++
>>> >   include/linux/virtio_config.h |   12 +++++++
>>> >   3 files changed, 89 insertions(+), 1 deletions(-)
>>> >
>>>
>>> Please run get_maintainter.pl before sending the patch. You'd better
>>> at least cc virtio maintainer/list for this.
>>>
>>> The core aRFS method is a noop in this RFC which make this series no
>>> much sense to discuss. You should at least mention the big picture
>>> here in the cover letter. I suggest you should post a RFC which can
>>> run and has expected result or you can just raise a thread for the
>>> design discussion.
>>>
>>> And this method has been discussed before, you can search "[net-next
>>> RFC PATCH 5/5] virtio-net: flow director support" in netdev archive
>>> for a very old prototype implemented by me. It can work and looks like
>>> most of this RFC have already done there.
>>>
>>> A basic question is whether or not we need this, not all the mq cards
>>> use aRFS (see ixgbe ATR). And whether or not it can bring extra
>>> overheads? For virtio, we want to reduce the vmexits as much as
>>> possible but this aRFS seems introduce a lot of more of this. Making a
>>> complex interfaces just for an virtual device may not be good, simple
>>> method may works for most of the cases.
>>>
>>> We really should consider to offload this to real nic. VMDq and L2
>>> forwarding offload may help in this case.
>>
> Adding flow director support would be a good step, Zhi's patches for
> support in tun have been merged, so support in virtio-net would be a
> good follow on. But, flow-director does have some limitations and
> performance issues of it's own (forced pairing between TX and RX
> queues, lookup on every TX packet). In the case of virtualization,
> aRFS, RSS, ntuple filtering, LRO, etc. can be implemented as software
> emulations and so far seems to be wins in most cases. Extending these
> down into the stack so that they can leverage HW mechanisms is a good
> goal for best performance. It's probably generally true that most of
> the offloads commonly available for NICs we'll want in virtualization
> path. Of course, we need to deomonstrate that they provide real
> performance benefit in this use case.
>
> I believe tying in aRFS (or flow director) into a real aRFS is just a
> matter of programming the RFS table properly. This is not the complex
> side of the interface, I believe this already works with the tun
> patches.
>
>> Zhi Yong and I had an IRC chat.  I wanted to post my questions on the
>> list - it's still the same concern I had in the old email thread that
>> Jason mentioned.
>>
>> In order for virtio-net aRFS to make sense there needs to be an overall
>> plan for pushing flow mapping information down to the physical NIC.
>> That's the only way to actually achieve the benefit of steering:
>> processing the packet on the CPU where the application is running.
>>
> I don't think this is necessarily true. Per flow steering amongst
> virtual queues should be beneficial in itself. virtio-net can leverage
> RFS or aRFS where it's available.
>
>> If it's not possible or too hard to implement aRFS down the entire
>> stack, we won't be able to process the packet on the right CPU.
>> Then we might as well not bother with aRFS and just distribute uniformly
>> across the rx virtqueues.
>>
>> Please post an outline of how rx packets will be steered up the stack so
>> we can discuss whether aRFS can bring any benefit.
>>
> 1. The aRFS interface for the guest to specify which virtual queue to
> receive a packet on is fairly straight forward.
> 2. To hook into RFS, we need to match the virtual queue to the real
> CPU it will processed on, and then program the RFS table for that flow
> and CPU.
> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
> correct queue for the CPU.
Does anyone have time to make one conclusion for this discussion? in
particular, how will rx packet be steered up the stack from guest
virtio_net driver, virtio_net NIC, vhost_net, tun driver, host network
stack, to physical NIC with more details?
What is the role of each path units? otherwise this discussion wont
get any meanful result, which is not what we expect.

>
>> Stefan



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-22 13:27         ` Zhi Yong Wu
@ 2014-01-22 18:00           ` Tom Herbert
  2014-01-23  0:40             ` Zhi Yong Wu
  2014-01-23 14:23             ` Michael S. Tsirkin
  0 siblings, 2 replies; 32+ messages in thread
From: Tom Herbert @ 2014-01-22 18:00 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Ben Hutchings, Stefan Hajnoczi, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell,
	Jason Wang

>> 1. The aRFS interface for the guest to specify which virtual queue to
>> receive a packet on is fairly straight forward.
>> 2. To hook into RFS, we need to match the virtual queue to the real
>> CPU it will processed on, and then program the RFS table for that flow
>> and CPU.
>> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
>> correct queue for the CPU.
> Does anyone have time to make one conclusion for this discussion? in
> particular, how will rx packet be steered up the stack from guest
> virtio_net driver, virtio_net NIC, vhost_net, tun driver, host network
> stack, to physical NIC with more details?
> What is the role of each path units? otherwise this discussion wont
> get any meanful result, which is not what we expect.
>
Working code outweighs theoretical discussion :-). I think you started
on a good track with original patches, and I believe the tun path
should work pretty well (some performance numbers would still be good
to validate). Seems like there's enough hooks in the virtio_net path
to implement something meaningful and maybe get some numbers (maybe
ignore NIC aRFS in the first pass).

Tom

>>
>>> Stefan
>
>
>
> --
> Regards,
>
> Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-22 18:00           ` Tom Herbert
@ 2014-01-23  0:40             ` Zhi Yong Wu
  2014-01-23 14:23             ` Michael S. Tsirkin
  1 sibling, 0 replies; 32+ messages in thread
From: Zhi Yong Wu @ 2014-01-23  0:40 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Ben Hutchings, Stefan Hajnoczi, Linux Netdev List, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Michael S. Tsirkin, Rusty Russell,
	Jason Wang

On Thu, Jan 23, 2014 at 2:00 AM, Tom Herbert <therbert@google.com> wrote:
>>> 1. The aRFS interface for the guest to specify which virtual queue to
>>> receive a packet on is fairly straight forward.
>>> 2. To hook into RFS, we need to match the virtual queue to the real
>>> CPU it will processed on, and then program the RFS table for that flow
>>> and CPU.
>>> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
>>> correct queue for the CPU.
>> Does anyone have time to make one conclusion for this discussion? in
>> particular, how will rx packet be steered up the stack from guest
>> virtio_net driver, virtio_net NIC, vhost_net, tun driver, host network
>> stack, to physical NIC with more details?
>> What is the role of each path units? otherwise this discussion wont
>> get any meanful result, which is not what we expect.
>>
> Working code outweighs theoretical discussion :-). I think you started
> on a good track with original patches, and I believe the tun path
> should work pretty well (some performance numbers would still be good
I planed to run netperf in one kvm guest with the path "vhost_net +
tun + OVS bridge". But it seems to be a bit difficult for me to get
some x86 hardwares. The boxes which i have all are Power arch.
> to validate). Seems like there's enough hooks in the virtio_net path
> to implement something meaningful and maybe get some numbers (maybe
Can you say what something meaningful is with more details? What is
the roadmap of virt_net aRFS support which you expect?
> ignore NIC aRFS in the first pass).
>
> Tom
>
>>>
>>>> Stefan
>>
>>
>>
>> --
>> Regards,
>>
>> Zhi Yong Wu



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Fwd: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-22 18:00           ` Tom Herbert
  2014-01-23  0:40             ` Zhi Yong Wu
@ 2014-01-23 14:23             ` Michael S. Tsirkin
  1 sibling, 0 replies; 32+ messages in thread
From: Michael S. Tsirkin @ 2014-01-23 14:23 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Zhi Yong Wu, Ben Hutchings, Stefan Hajnoczi, Linux Netdev List,
	Eric Dumazet, David S. Miller, Zhi Yong Wu, Rusty Russell,
	Jason Wang

On Wed, Jan 22, 2014 at 10:00:37AM -0800, Tom Herbert wrote:
> >> 1. The aRFS interface for the guest to specify which virtual queue to
> >> receive a packet on is fairly straight forward.
> >> 2. To hook into RFS, we need to match the virtual queue to the real
> >> CPU it will processed on, and then program the RFS table for that flow
> >> and CPU.
> >> 3. NIC aRFS keys off the RFS tables so it can program the HW with the
> >> correct queue for the CPU.
> > Does anyone have time to make one conclusion for this discussion? in
> > particular, how will rx packet be steered up the stack from guest
> > virtio_net driver, virtio_net NIC, vhost_net, tun driver, host network
> > stack, to physical NIC with more details?
> > What is the role of each path units? otherwise this discussion wont
> > get any meanful result, which is not what we expect.
> >
> Working code outweighs theoretical discussion :-).

So far all that was posted was an untested patchset.
Zhi Yong Wu did this intentionally to get early feedback, so
it's not surprising we got a theoretical discussion
as opposed to working code out of this.

> I think you started
> on a good track with original patches, and I believe the tun path
> should work pretty well (some performance numbers would still be good
> to validate).

Yes, merging performance-oriented patches without any
performance numbers to show for it is strange.

> Seems like there's enough hooks in the virtio_net path
> to implement something meaningful and maybe get some numbers (maybe
> ignore NIC aRFS in the first pass).
> 
> Tom
> 
> >>
> >>> Stefan
> >
> >
> >
> > --
> > Regards,
> >
> > Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [RFC PATCH net-next 0/3] virtio_net: add aRFS support
  2014-01-16  8:48   ` Zhi Yong Wu
@ 2014-01-23 14:26     ` Michael S. Tsirkin
  0 siblings, 0 replies; 32+ messages in thread
From: Michael S. Tsirkin @ 2014-01-23 14:26 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Jason Wang, Linux Netdev List, Tom Herbert, Eric Dumazet,
	David S. Miller, Zhi Yong Wu, Stefan Hajnoczi, Rusty Russell

On Thu, Jan 16, 2014 at 04:48:43PM +0800, Zhi Yong Wu wrote:
> On Thu, Jan 16, 2014 at 12:23 PM, Jason Wang <jasowang@redhat.com> wrote:
> > On 01/15/2014 10:20 PM, Zhi Yong Wu wrote:
> >>
> >> From: Zhi Yong Wu<wuzhy@linux.vnet.ibm.com>
> >>
> >> HI, folks
> >>
> >> The patchset is trying to integrate aRFS support to virtio_net. In this
> >> case,
> >> aRFS will be used to select the RX queue. To make sure that it's going
> >> ahead
> >> in the correct direction, although it is still one RFC and isn't tested,
> >> it's
> >> post out ASAP. Any comment are appreciated, thanks.
> >>
> >> If anyone is interested in playing with it, you can get this patchset from
> >> my
> >> dev git on github:
> >>    git://github.com/wuzhy/kernel.git virtnet_rfs
> >>
> >> Zhi Yong Wu (3):
> >>    virtio_pci: Introduce one new config api vp_get_vq_irq()
> >>    virtio_net: Introduce one dummy function virtnet_filter_rfs()
> >>    virtio-net: Add accelerated RFS support
> >>
> >>   drivers/net/virtio_net.c      |   67
> >> ++++++++++++++++++++++++++++++++++++++++-
> >>   drivers/virtio/virtio_pci.c   |   11 +++++++
> >>   include/linux/virtio_config.h |   12 +++++++
> >>   3 files changed, 89 insertions(+), 1 deletions(-)
> >>
> >
> > Please run get_maintainter.pl before sending the patch. You'd better at
> > least cc virtio maintainer/list for this.
> Is this one must for virtio stuff?

See Documentation/SubmittingPatches

> >
> > The core aRFS method is a noop in this RFC which make this series no much
> > sense to discuss. You should at least mention the big picture here in the
> > cover letter. I suggest you should post a RFC which can run and has expected
> > result or you can just raise a thread for the design discussion.
> Yes, it currently miss some important stuff as i said in another mail
> of this series.
> 
> >
> > And this method has been discussed before, you can search "[net-next RFC
> > PATCH 5/5] virtio-net: flow director support" in netdev archive for a very
> > old prototype implemented by me. It can work and looks like most of this RFC
> > have already done there.
> ah? Can you let me know the result of your discussion? Will checked
> it, thanks for your pointer.
> >
> > A basic question is whether or not we need this, not all the mq cards use
> > aRFS (see ixgbe ATR). And whether or not it can bring extra overheads? For
> > virtio, we want to reduce the vmexits as much as possible but this aRFS
> Good question, i also have concern about this, and don't know if Tom
> has good explanation.
> > seems introduce a lot of more of this. Making a complex interfaces just for
> > an virtual device may not be good, simple method may works for most of the
> > cases.
> >
> > We really should consider to offload this to real nic. VMDq and L2
> > forwarding offload may help in this case.
> 
> By the way, Stefan, can you let us know your concerns here? as we
> talked in irc channel. :)
> 
> 
> -- 
> Regards,
> 
> Zhi Yong Wu

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2014-01-23 14:21 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-15 14:20 [RFC PATCH net-next 0/3] virtio_net: add aRFS support Zhi Yong Wu
2014-01-15 14:20 ` [RFC PATCH net-next 1/3] virtio_pci: Introduce one new config api vp_get_vq_irq() Zhi Yong Wu
2014-01-15 14:20 ` [RFC PATCH net-next 2/3] virtio_net: Introduce one dummy function virtnet_filter_rfs() Zhi Yong Wu
2014-01-15 17:54   ` Tom Herbert
2014-01-16  2:45     ` Zhi Yong Wu
2014-01-15 14:20 ` [RFC PATCH net-next 3/3] virtio-net: Add accelerated RFS support Zhi Yong Wu
2014-01-16 21:31   ` Ben Hutchings
2014-01-16 22:00     ` Zhi Yong Wu
2014-01-16 23:16       ` Ben Hutchings
2014-01-17 16:54         ` Zhi Yong Wu
2014-01-17 17:20           ` Ben Hutchings
2014-01-18  4:59             ` Tom Herbert
2014-01-18 14:19               ` Ben Hutchings
2014-01-16  4:23 ` [RFC PATCH net-next 0/3] virtio_net: add aRFS support Jason Wang
2014-01-16  8:34   ` Fwd: " Zhi Yong Wu
2014-01-16  8:52     ` Stefan Hajnoczi
2014-01-16 17:12       ` Tom Herbert
2014-01-17  3:26         ` Jason Wang
2014-01-17  5:08           ` Tom Herbert
2014-01-17  6:36             ` Jason Wang
2014-01-17 16:03               ` Tom Herbert
2014-01-17  5:22         ` Stefan Hajnoczi
2014-01-17  6:45           ` Jason Wang
2014-01-20 14:36           ` Ben Hutchings
2014-01-22 13:27         ` Zhi Yong Wu
2014-01-22 18:00           ` Tom Herbert
2014-01-23  0:40             ` Zhi Yong Wu
2014-01-23 14:23             ` Michael S. Tsirkin
2014-01-17  3:04       ` Jason Wang
2014-01-20 16:49         ` Stefan Hajnoczi
2014-01-16  8:48   ` Zhi Yong Wu
2014-01-23 14:26     ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).