All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()
@ 2015-10-22  5:27 ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-22  5:27 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel; +Cc: Jason Wang

This path introduces a helper which can give a hint for whether or not
there's a work queued in the work list.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 6 ++++++
 drivers/vhost/vhost.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index eec2f11..d42d11e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 }
 EXPORT_SYMBOL_GPL(vhost_work_queue);
 
+bool vhost_has_work(struct vhost_dev *dev)
+{
+	return !list_empty(&dev->work_list);
+}
+EXPORT_SYMBOL_GPL(vhost_has_work);
+
 void vhost_poll_queue(struct vhost_poll *poll)
 {
 	vhost_work_queue(poll->dev, &poll->work);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 4772862..ea0327d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -37,6 +37,7 @@ struct vhost_poll {
 
 void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
+bool vhost_has_work(struct vhost_dev *dev);
 
 void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
 		     unsigned long mask, struct vhost_dev *dev);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()
@ 2015-10-22  5:27 ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-22  5:27 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel

This path introduces a helper which can give a hint for whether or not
there's a work queued in the work list.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 6 ++++++
 drivers/vhost/vhost.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index eec2f11..d42d11e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 }
 EXPORT_SYMBOL_GPL(vhost_work_queue);
 
+bool vhost_has_work(struct vhost_dev *dev)
+{
+	return !list_empty(&dev->work_list);
+}
+EXPORT_SYMBOL_GPL(vhost_has_work);
+
 void vhost_poll_queue(struct vhost_poll *poll)
 {
 	vhost_work_queue(poll->dev, &poll->work);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 4772862..ea0327d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -37,6 +37,7 @@ struct vhost_poll {
 
 void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
+bool vhost_has_work(struct vhost_dev *dev);
 
 void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
 		     unsigned long mask, struct vhost_dev *dev);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22  5:27 ` Jason Wang
@ 2015-10-22  5:27   ` Jason Wang
  -1 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-22  5:27 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel; +Cc: Jason Wang

This patch tries to poll for new added tx buffer for a while at the
end of tx processing. The maximum time spent on polling were limited
through a module parameter. To avoid block rx, the loop will end it
there's new other works queued on vhost so in fact socket receive
queue is also be polled.

busyloop_timeout = 50 gives us following improvement on TCP_RR test:

size/session/+thu%/+normalize%
    1/     1/   +5%/  -20%
    1/    50/  +17%/   +3%

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..bbb522a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -31,7 +31,9 @@
 #include "vhost.h"
 
 static int experimental_zcopytx = 1;
+static int busyloop_timeout = 50;
 module_param(experimental_zcopytx, int, 0444);
+module_param(busyloop_timeout, int, 0444);
 MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
 		                       " 1 -Enable; 0 - Disable");
 
@@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 	rcu_read_unlock_bh();
 }
 
+static bool tx_can_busy_poll(struct vhost_dev *dev,
+			     unsigned long endtime)
+{
+	unsigned long now = local_clock() >> 10;
+
+	return busyloop_timeout && !need_resched() &&
+	       !time_after(now, endtime) && !vhost_has_work(dev) &&
+	       single_task_running();
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
+	unsigned long endtime;
 	unsigned out, in;
 	int head;
 	struct msghdr msg = {
@@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
 			      % UIO_MAXIOV == nvq->done_idx))
 			break;
 
+		endtime  = (local_clock() >> 10) + busyloop_timeout;
+again:
 		head = vhost_get_vq_desc(vq, vq->iov,
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
@@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
 			break;
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
+			if (tx_can_busy_poll(vq->dev, endtime)) {
+				cpu_relax();
+				goto again;
+			}
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				vhost_disable_notify(&net->dev, vq);
 				continue;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH net-next RFC 2/2] vhost_net: basic polling support
@ 2015-10-22  5:27   ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-22  5:27 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel

This patch tries to poll for new added tx buffer for a while at the
end of tx processing. The maximum time spent on polling were limited
through a module parameter. To avoid block rx, the loop will end it
there's new other works queued on vhost so in fact socket receive
queue is also be polled.

busyloop_timeout = 50 gives us following improvement on TCP_RR test:

size/session/+thu%/+normalize%
    1/     1/   +5%/  -20%
    1/    50/  +17%/   +3%

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..bbb522a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -31,7 +31,9 @@
 #include "vhost.h"
 
 static int experimental_zcopytx = 1;
+static int busyloop_timeout = 50;
 module_param(experimental_zcopytx, int, 0444);
+module_param(busyloop_timeout, int, 0444);
 MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
 		                       " 1 -Enable; 0 - Disable");
 
@@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 	rcu_read_unlock_bh();
 }
 
+static bool tx_can_busy_poll(struct vhost_dev *dev,
+			     unsigned long endtime)
+{
+	unsigned long now = local_clock() >> 10;
+
+	return busyloop_timeout && !need_resched() &&
+	       !time_after(now, endtime) && !vhost_has_work(dev) &&
+	       single_task_running();
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
+	unsigned long endtime;
 	unsigned out, in;
 	int head;
 	struct msghdr msg = {
@@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
 			      % UIO_MAXIOV == nvq->done_idx))
 			break;
 
+		endtime  = (local_clock() >> 10) + busyloop_timeout;
+again:
 		head = vhost_get_vq_desc(vq, vq->iov,
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
@@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
 			break;
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
+			if (tx_can_busy_poll(vq->dev, endtime)) {
+				cpu_relax();
+				goto again;
+			}
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				vhost_disable_notify(&net->dev, vq);
 				continue;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()
  2015-10-22  5:27 ` Jason Wang
@ 2015-10-22  8:38   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-22  8:38 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Thu, Oct 22, 2015 at 01:27:28AM -0400, Jason Wang wrote:
> This path introduces a helper which can give a hint for whether or not
> there's a work queued in the work list.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/vhost/vhost.c | 6 ++++++
>  drivers/vhost/vhost.h | 1 +
>  2 files changed, 7 insertions(+)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index eec2f11..d42d11e 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
>  }
>  EXPORT_SYMBOL_GPL(vhost_work_queue);
>  
> +bool vhost_has_work(struct vhost_dev *dev)
> +{
> +	return !list_empty(&dev->work_list);
> +}
> +EXPORT_SYMBOL_GPL(vhost_has_work);
> +
>  void vhost_poll_queue(struct vhost_poll *poll)
>  {
>  	vhost_work_queue(poll->dev, &poll->work);


This doesn't take a lock so it's unreliable.
I think it's ok in this case since it's just
an optimization - but pls document this.

> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 4772862..ea0327d 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -37,6 +37,7 @@ struct vhost_poll {
>  
>  void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
>  void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
> +bool vhost_has_work(struct vhost_dev *dev);
>  
>  void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
>  		     unsigned long mask, struct vhost_dev *dev);
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()
@ 2015-10-22  8:38   ` Michael S. Tsirkin
  0 siblings, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-22  8:38 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization

On Thu, Oct 22, 2015 at 01:27:28AM -0400, Jason Wang wrote:
> This path introduces a helper which can give a hint for whether or not
> there's a work queued in the work list.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/vhost/vhost.c | 6 ++++++
>  drivers/vhost/vhost.h | 1 +
>  2 files changed, 7 insertions(+)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index eec2f11..d42d11e 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
>  }
>  EXPORT_SYMBOL_GPL(vhost_work_queue);
>  
> +bool vhost_has_work(struct vhost_dev *dev)
> +{
> +	return !list_empty(&dev->work_list);
> +}
> +EXPORT_SYMBOL_GPL(vhost_has_work);
> +
>  void vhost_poll_queue(struct vhost_poll *poll)
>  {
>  	vhost_work_queue(poll->dev, &poll->work);


This doesn't take a lock so it's unreliable.
I think it's ok in this case since it's just
an optimization - but pls document this.

> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 4772862..ea0327d 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -37,6 +37,7 @@ struct vhost_poll {
>  
>  void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
>  void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
> +bool vhost_has_work(struct vhost_dev *dev);
>  
>  void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
>  		     unsigned long mask, struct vhost_dev *dev);
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22  5:27   ` Jason Wang
@ 2015-10-22  9:33     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-22  9:33 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
> This patch tries to poll for new added tx buffer for a while at the
> end of tx processing. The maximum time spent on polling were limited
> through a module parameter. To avoid block rx, the loop will end it
> there's new other works queued on vhost so in fact socket receive
> queue is also be polled.
> 
> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
> 
> size/session/+thu%/+normalize%
>     1/     1/   +5%/  -20%
>     1/    50/  +17%/   +3%

Is there a measureable increase in cpu utilization
with busyloop_timeout = 0?

> Signed-off-by: Jason Wang <jasowang@redhat.com>

We might be able to shave off the minor regression
by careful use of likely/unlikely, or maybe
deferring 

> ---
>  drivers/vhost/net.c | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 9eda69e..bbb522a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -31,7 +31,9 @@
>  #include "vhost.h"
>  
>  static int experimental_zcopytx = 1;
> +static int busyloop_timeout = 50;
>  module_param(experimental_zcopytx, int, 0444);
> +module_param(busyloop_timeout, int, 0444);

Pls add a description, including the units and the special
value 0.

>  MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
>  		                       " 1 -Enable; 0 - Disable");
>  
> @@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>  	rcu_read_unlock_bh();
>  }
>  
> +static bool tx_can_busy_poll(struct vhost_dev *dev,
> +			     unsigned long endtime)
> +{
> +	unsigned long now = local_clock() >> 10;

local_clock might go backwards if we jump between CPUs.
One way to fix would be to record the CPU id and break
out of loop if that changes.

Also - defer this until we actually know we need it?

> +
> +	return busyloop_timeout && !need_resched() &&
> +	       !time_after(now, endtime) && !vhost_has_work(dev) &&
> +	       single_task_running();

signal pending as well?

> +}
> +
>  /* Expects to be always run from workqueue - which acts as
>   * read-size critical section for our kind of RCU. */
>  static void handle_tx(struct vhost_net *net)
>  {
>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
>  	struct vhost_virtqueue *vq = &nvq->vq;
> +	unsigned long endtime;
>  	unsigned out, in;
>  	int head;
>  	struct msghdr msg = {
> @@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
>  			      % UIO_MAXIOV == nvq->done_idx))
>  			break;
>  
> +		endtime  = (local_clock() >> 10) + busyloop_timeout;
> +again:
>  		head = vhost_get_vq_desc(vq, vq->iov,
>  					 ARRAY_SIZE(vq->iov),
>  					 &out, &in,
> @@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
>  			break;
>  		/* Nothing new?  Wait for eventfd to tell us they refilled. */
>  		if (head == vq->num) {
> +			if (tx_can_busy_poll(vq->dev, endtime)) {
> +				cpu_relax();
> +				goto again;
> +			}
>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
>  				vhost_disable_notify(&net->dev, vq);
>  				continue;
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
@ 2015-10-22  9:33     ` Michael S. Tsirkin
  0 siblings, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-22  9:33 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization

On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
> This patch tries to poll for new added tx buffer for a while at the
> end of tx processing. The maximum time spent on polling were limited
> through a module parameter. To avoid block rx, the loop will end it
> there's new other works queued on vhost so in fact socket receive
> queue is also be polled.
> 
> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
> 
> size/session/+thu%/+normalize%
>     1/     1/   +5%/  -20%
>     1/    50/  +17%/   +3%

Is there a measureable increase in cpu utilization
with busyloop_timeout = 0?

> Signed-off-by: Jason Wang <jasowang@redhat.com>

We might be able to shave off the minor regression
by careful use of likely/unlikely, or maybe
deferring 

> ---
>  drivers/vhost/net.c | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 9eda69e..bbb522a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -31,7 +31,9 @@
>  #include "vhost.h"
>  
>  static int experimental_zcopytx = 1;
> +static int busyloop_timeout = 50;
>  module_param(experimental_zcopytx, int, 0444);
> +module_param(busyloop_timeout, int, 0444);

Pls add a description, including the units and the special
value 0.

>  MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
>  		                       " 1 -Enable; 0 - Disable");
>  
> @@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>  	rcu_read_unlock_bh();
>  }
>  
> +static bool tx_can_busy_poll(struct vhost_dev *dev,
> +			     unsigned long endtime)
> +{
> +	unsigned long now = local_clock() >> 10;

local_clock might go backwards if we jump between CPUs.
One way to fix would be to record the CPU id and break
out of loop if that changes.

Also - defer this until we actually know we need it?

> +
> +	return busyloop_timeout && !need_resched() &&
> +	       !time_after(now, endtime) && !vhost_has_work(dev) &&
> +	       single_task_running();

signal pending as well?

> +}
> +
>  /* Expects to be always run from workqueue - which acts as
>   * read-size critical section for our kind of RCU. */
>  static void handle_tx(struct vhost_net *net)
>  {
>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
>  	struct vhost_virtqueue *vq = &nvq->vq;
> +	unsigned long endtime;
>  	unsigned out, in;
>  	int head;
>  	struct msghdr msg = {
> @@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
>  			      % UIO_MAXIOV == nvq->done_idx))
>  			break;
>  
> +		endtime  = (local_clock() >> 10) + busyloop_timeout;
> +again:
>  		head = vhost_get_vq_desc(vq, vq->iov,
>  					 ARRAY_SIZE(vq->iov),
>  					 &out, &in,
> @@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
>  			break;
>  		/* Nothing new?  Wait for eventfd to tell us they refilled. */
>  		if (head == vq->num) {
> +			if (tx_can_busy_poll(vq->dev, endtime)) {
> +				cpu_relax();
> +				goto again;
> +			}
>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
>  				vhost_disable_notify(&net->dev, vq);
>  				continue;
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22  9:33     ` Michael S. Tsirkin
  (?)
  (?)
@ 2015-10-22 15:46     ` Rick Jones
  2015-10-22 16:16       ` Michael S. Tsirkin
  2015-10-22 16:16       ` Michael S. Tsirkin
  -1 siblings, 2 replies; 20+ messages in thread
From: Rick Jones @ 2015-10-22 15:46 UTC (permalink / raw)
  To: Michael S. Tsirkin, Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On 10/22/2015 02:33 AM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
>> This patch tries to poll for new added tx buffer for a while at the
>> end of tx processing. The maximum time spent on polling were limited
>> through a module parameter. To avoid block rx, the loop will end it
>> there's new other works queued on vhost so in fact socket receive
>> queue is also be polled.
>>
>> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
>>
>> size/session/+thu%/+normalize%
>>      1/     1/   +5%/  -20%
>>      1/    50/  +17%/   +3%
>
> Is there a measureable increase in cpu utilization
> with busyloop_timeout = 0?

And since a netperf TCP_RR test is involved, be careful about what 
netperf reports for CPU util if that increase isn't in the context of 
the guest OS.

For completeness, looking at the effect on TCP_STREAM and TCP_MAERTS, 
aggregate _RR and even aggregate _RR/packets per second for many VMs on 
the same system would be in order.

happy benchmarking,

rick jones


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22  9:33     ` Michael S. Tsirkin
  (?)
@ 2015-10-22 15:46     ` Rick Jones
  -1 siblings, 0 replies; 20+ messages in thread
From: Rick Jones @ 2015-10-22 15:46 UTC (permalink / raw)
  To: Michael S. Tsirkin, Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization

On 10/22/2015 02:33 AM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
>> This patch tries to poll for new added tx buffer for a while at the
>> end of tx processing. The maximum time spent on polling were limited
>> through a module parameter. To avoid block rx, the loop will end it
>> there's new other works queued on vhost so in fact socket receive
>> queue is also be polled.
>>
>> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
>>
>> size/session/+thu%/+normalize%
>>      1/     1/   +5%/  -20%
>>      1/    50/  +17%/   +3%
>
> Is there a measureable increase in cpu utilization
> with busyloop_timeout = 0?

And since a netperf TCP_RR test is involved, be careful about what 
netperf reports for CPU util if that increase isn't in the context of 
the guest OS.

For completeness, looking at the effect on TCP_STREAM and TCP_MAERTS, 
aggregate _RR and even aggregate _RR/packets per second for many VMs on 
the same system would be in order.

happy benchmarking,

rick jones

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22 15:46     ` Rick Jones
@ 2015-10-22 16:16       ` Michael S. Tsirkin
  2015-10-23  7:14           ` Jason Wang
  2015-10-22 16:16       ` Michael S. Tsirkin
  1 sibling, 1 reply; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-22 16:16 UTC (permalink / raw)
  To: Rick Jones; +Cc: Jason Wang, kvm, virtualization, netdev, linux-kernel

On Thu, Oct 22, 2015 at 08:46:33AM -0700, Rick Jones wrote:
> On 10/22/2015 02:33 AM, Michael S. Tsirkin wrote:
> >On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
> >>This patch tries to poll for new added tx buffer for a while at the
> >>end of tx processing. The maximum time spent on polling were limited
> >>through a module parameter. To avoid block rx, the loop will end it
> >>there's new other works queued on vhost so in fact socket receive
> >>queue is also be polled.
> >>
> >>busyloop_timeout = 50 gives us following improvement on TCP_RR test:
> >>
> >>size/session/+thu%/+normalize%
> >>     1/     1/   +5%/  -20%
> >>     1/    50/  +17%/   +3%
> >
> >Is there a measureable increase in cpu utilization
> >with busyloop_timeout = 0?
> 
> And since a netperf TCP_RR test is involved, be careful about what netperf
> reports for CPU util if that increase isn't in the context of the guest OS.
> 
> For completeness, looking at the effect on TCP_STREAM and TCP_MAERTS,
> aggregate _RR and even aggregate _RR/packets per second for many VMs on the
> same system would be in order.
> 
> happy benchmarking,
> 
> rick jones

Absolutely, merging a new kernel API just for a specific
benchmark doesn't make sense.
I'm guessing this is just an early RFC, a fuller submission
will probably include more numbers.

-- 
MST

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22 15:46     ` Rick Jones
  2015-10-22 16:16       ` Michael S. Tsirkin
@ 2015-10-22 16:16       ` Michael S. Tsirkin
  1 sibling, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-22 16:16 UTC (permalink / raw)
  To: Rick Jones; +Cc: netdev, linux-kernel, kvm, virtualization

On Thu, Oct 22, 2015 at 08:46:33AM -0700, Rick Jones wrote:
> On 10/22/2015 02:33 AM, Michael S. Tsirkin wrote:
> >On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
> >>This patch tries to poll for new added tx buffer for a while at the
> >>end of tx processing. The maximum time spent on polling were limited
> >>through a module parameter. To avoid block rx, the loop will end it
> >>there's new other works queued on vhost so in fact socket receive
> >>queue is also be polled.
> >>
> >>busyloop_timeout = 50 gives us following improvement on TCP_RR test:
> >>
> >>size/session/+thu%/+normalize%
> >>     1/     1/   +5%/  -20%
> >>     1/    50/  +17%/   +3%
> >
> >Is there a measureable increase in cpu utilization
> >with busyloop_timeout = 0?
> 
> And since a netperf TCP_RR test is involved, be careful about what netperf
> reports for CPU util if that increase isn't in the context of the guest OS.
> 
> For completeness, looking at the effect on TCP_STREAM and TCP_MAERTS,
> aggregate _RR and even aggregate _RR/packets per second for many VMs on the
> same system would be in order.
> 
> happy benchmarking,
> 
> rick jones

Absolutely, merging a new kernel API just for a specific
benchmark doesn't make sense.
I'm guessing this is just an early RFC, a fuller submission
will probably include more numbers.

-- 
MST

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()
  2015-10-22  8:38   ` Michael S. Tsirkin
@ 2015-10-23  7:10     ` Jason Wang
  -1 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-23  7:10 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel



On 10/22/2015 04:38 PM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 01:27:28AM -0400, Jason Wang wrote:
>> > This path introduces a helper which can give a hint for whether or not
>> > there's a work queued in the work list.
>> > 
>> > Signed-off-by: Jason Wang <jasowang@redhat.com>
>> > ---
>> >  drivers/vhost/vhost.c | 6 ++++++
>> >  drivers/vhost/vhost.h | 1 +
>> >  2 files changed, 7 insertions(+)
>> > 
>> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> > index eec2f11..d42d11e 100644
>> > --- a/drivers/vhost/vhost.c
>> > +++ b/drivers/vhost/vhost.c
>> > @@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
>> >  }
>> >  EXPORT_SYMBOL_GPL(vhost_work_queue);
>> >  
>> > +bool vhost_has_work(struct vhost_dev *dev)
>> > +{
>> > +	return !list_empty(&dev->work_list);
>> > +}
>> > +EXPORT_SYMBOL_GPL(vhost_has_work);
>> > +
>> >  void vhost_poll_queue(struct vhost_poll *poll)
>> >  {
>> >  	vhost_work_queue(poll->dev, &poll->work);
> This doesn't take a lock so it's unreliable.
> I think it's ok in this case since it's just
> an optimization - but pls document this.
>

Ok, will do.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()
@ 2015-10-23  7:10     ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-23  7:10 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization



On 10/22/2015 04:38 PM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 01:27:28AM -0400, Jason Wang wrote:
>> > This path introduces a helper which can give a hint for whether or not
>> > there's a work queued in the work list.
>> > 
>> > Signed-off-by: Jason Wang <jasowang@redhat.com>
>> > ---
>> >  drivers/vhost/vhost.c | 6 ++++++
>> >  drivers/vhost/vhost.h | 1 +
>> >  2 files changed, 7 insertions(+)
>> > 
>> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> > index eec2f11..d42d11e 100644
>> > --- a/drivers/vhost/vhost.c
>> > +++ b/drivers/vhost/vhost.c
>> > @@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
>> >  }
>> >  EXPORT_SYMBOL_GPL(vhost_work_queue);
>> >  
>> > +bool vhost_has_work(struct vhost_dev *dev)
>> > +{
>> > +	return !list_empty(&dev->work_list);
>> > +}
>> > +EXPORT_SYMBOL_GPL(vhost_has_work);
>> > +
>> >  void vhost_poll_queue(struct vhost_poll *poll)
>> >  {
>> >  	vhost_work_queue(poll->dev, &poll->work);
> This doesn't take a lock so it's unreliable.
> I think it's ok in this case since it's just
> an optimization - but pls document this.
>

Ok, will do.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22  9:33     ` Michael S. Tsirkin
                       ` (3 preceding siblings ...)
  (?)
@ 2015-10-23  7:13     ` Jason Wang
  2015-10-23 13:39       ` Michael S. Tsirkin
  2015-10-23 13:39       ` Michael S. Tsirkin
  -1 siblings, 2 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-23  7:13 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel



On 10/22/2015 05:33 PM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
>> This patch tries to poll for new added tx buffer for a while at the
>> end of tx processing. The maximum time spent on polling were limited
>> through a module parameter. To avoid block rx, the loop will end it
>> there's new other works queued on vhost so in fact socket receive
>> queue is also be polled.
>>
>> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
>>
>> size/session/+thu%/+normalize%
>>     1/     1/   +5%/  -20%
>>     1/    50/  +17%/   +3%
> Is there a measureable increase in cpu utilization
> with busyloop_timeout = 0?

Just run TCP_RR, no increasing. Will run a complete test on next version.

>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> We might be able to shave off the minor regression
> by careful use of likely/unlikely, or maybe
> deferring 

Yes, but what did "deferring" mean here?
 
>
>> ---
>>  drivers/vhost/net.c | 19 +++++++++++++++++++
>>  1 file changed, 19 insertions(+)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index 9eda69e..bbb522a 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -31,7 +31,9 @@
>>  #include "vhost.h"
>>  
>>  static int experimental_zcopytx = 1;
>> +static int busyloop_timeout = 50;
>>  module_param(experimental_zcopytx, int, 0444);
>> +module_param(busyloop_timeout, int, 0444);
> Pls add a description, including the units and the special
> value 0.

Ok.

>
>>  MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
>>  		                       " 1 -Enable; 0 - Disable");
>>  
>> @@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>>  	rcu_read_unlock_bh();
>>  }
>>  
>> +static bool tx_can_busy_poll(struct vhost_dev *dev,
>> +			     unsigned long endtime)
>> +{
>> +	unsigned long now = local_clock() >> 10;
> local_clock might go backwards if we jump between CPUs.
> One way to fix would be to record the CPU id and break
> out of loop if that changes.

Right, or maybe disable preemption in this case?

>
> Also - defer this until we actually know we need it?

Right.

>
>> +
>> +	return busyloop_timeout && !need_resched() &&
>> +	       !time_after(now, endtime) && !vhost_has_work(dev) &&
>> +	       single_task_running();
> signal pending as well?

Yes.

>> +}
>> +
>>  /* Expects to be always run from workqueue - which acts as
>>   * read-size critical section for our kind of RCU. */
>>  static void handle_tx(struct vhost_net *net)
>>  {
>>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
>>  	struct vhost_virtqueue *vq = &nvq->vq;
>> +	unsigned long endtime;
>>  	unsigned out, in;
>>  	int head;
>>  	struct msghdr msg = {
>> @@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
>>  			      % UIO_MAXIOV == nvq->done_idx))
>>  			break;
>>  
>> +		endtime  = (local_clock() >> 10) + busyloop_timeout;
>> +again:
>>  		head = vhost_get_vq_desc(vq, vq->iov,
>>  					 ARRAY_SIZE(vq->iov),
>>  					 &out, &in,
>> @@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
>>  			break;
>>  		/* Nothing new?  Wait for eventfd to tell us they refilled. */
>>  		if (head == vq->num) {
>> +			if (tx_can_busy_poll(vq->dev, endtime)) {
>> +				cpu_relax();
>> +				goto again;
>> +			}
>>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
>>  				vhost_disable_notify(&net->dev, vq);
>>  				continue;
>> -- 
>> 1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22  9:33     ` Michael S. Tsirkin
                       ` (2 preceding siblings ...)
  (?)
@ 2015-10-23  7:13     ` Jason Wang
  -1 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-23  7:13 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization



On 10/22/2015 05:33 PM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
>> This patch tries to poll for new added tx buffer for a while at the
>> end of tx processing. The maximum time spent on polling were limited
>> through a module parameter. To avoid block rx, the loop will end it
>> there's new other works queued on vhost so in fact socket receive
>> queue is also be polled.
>>
>> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
>>
>> size/session/+thu%/+normalize%
>>     1/     1/   +5%/  -20%
>>     1/    50/  +17%/   +3%
> Is there a measureable increase in cpu utilization
> with busyloop_timeout = 0?

Just run TCP_RR, no increasing. Will run a complete test on next version.

>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> We might be able to shave off the minor regression
> by careful use of likely/unlikely, or maybe
> deferring 

Yes, but what did "deferring" mean here?
 
>
>> ---
>>  drivers/vhost/net.c | 19 +++++++++++++++++++
>>  1 file changed, 19 insertions(+)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index 9eda69e..bbb522a 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -31,7 +31,9 @@
>>  #include "vhost.h"
>>  
>>  static int experimental_zcopytx = 1;
>> +static int busyloop_timeout = 50;
>>  module_param(experimental_zcopytx, int, 0444);
>> +module_param(busyloop_timeout, int, 0444);
> Pls add a description, including the units and the special
> value 0.

Ok.

>
>>  MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
>>  		                       " 1 -Enable; 0 - Disable");
>>  
>> @@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>>  	rcu_read_unlock_bh();
>>  }
>>  
>> +static bool tx_can_busy_poll(struct vhost_dev *dev,
>> +			     unsigned long endtime)
>> +{
>> +	unsigned long now = local_clock() >> 10;
> local_clock might go backwards if we jump between CPUs.
> One way to fix would be to record the CPU id and break
> out of loop if that changes.

Right, or maybe disable preemption in this case?

>
> Also - defer this until we actually know we need it?

Right.

>
>> +
>> +	return busyloop_timeout && !need_resched() &&
>> +	       !time_after(now, endtime) && !vhost_has_work(dev) &&
>> +	       single_task_running();
> signal pending as well?

Yes.

>> +}
>> +
>>  /* Expects to be always run from workqueue - which acts as
>>   * read-size critical section for our kind of RCU. */
>>  static void handle_tx(struct vhost_net *net)
>>  {
>>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
>>  	struct vhost_virtqueue *vq = &nvq->vq;
>> +	unsigned long endtime;
>>  	unsigned out, in;
>>  	int head;
>>  	struct msghdr msg = {
>> @@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
>>  			      % UIO_MAXIOV == nvq->done_idx))
>>  			break;
>>  
>> +		endtime  = (local_clock() >> 10) + busyloop_timeout;
>> +again:
>>  		head = vhost_get_vq_desc(vq, vq->iov,
>>  					 ARRAY_SIZE(vq->iov),
>>  					 &out, &in,
>> @@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
>>  			break;
>>  		/* Nothing new?  Wait for eventfd to tell us they refilled. */
>>  		if (head == vq->num) {
>> +			if (tx_can_busy_poll(vq->dev, endtime)) {
>> +				cpu_relax();
>> +				goto again;
>> +			}
>>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
>>  				vhost_disable_notify(&net->dev, vq);
>>  				continue;
>> -- 
>> 1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-22 16:16       ` Michael S. Tsirkin
@ 2015-10-23  7:14           ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-23  7:14 UTC (permalink / raw)
  To: Michael S. Tsirkin, Rick Jones; +Cc: kvm, virtualization, netdev, linux-kernel



On 10/23/2015 12:16 AM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 08:46:33AM -0700, Rick Jones wrote:
>> On 10/22/2015 02:33 AM, Michael S. Tsirkin wrote:
>>> On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
>>>> This patch tries to poll for new added tx buffer for a while at the
>>>> end of tx processing. The maximum time spent on polling were limited
>>>> through a module parameter. To avoid block rx, the loop will end it
>>>> there's new other works queued on vhost so in fact socket receive
>>>> queue is also be polled.
>>>>
>>>> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
>>>>
>>>> size/session/+thu%/+normalize%
>>>>     1/     1/   +5%/  -20%
>>>>     1/    50/  +17%/   +3%
>>> Is there a measureable increase in cpu utilization
>>> with busyloop_timeout = 0?
>> And since a netperf TCP_RR test is involved, be careful about what netperf
>> reports for CPU util if that increase isn't in the context of the guest OS.

Right, the cpu utilization is measured on host.

>>
>> For completeness, looking at the effect on TCP_STREAM and TCP_MAERTS,
>> aggregate _RR and even aggregate _RR/packets per second for many VMs on the
>> same system would be in order.
>>
>> happy benchmarking,
>>
>> rick jones
> Absolutely, merging a new kernel API just for a specific
> benchmark doesn't make sense.
> I'm guessing this is just an early RFC, a fuller submission
> will probably include more numbers.
>

Yes, will run more complete tests.

Thanks


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
@ 2015-10-23  7:14           ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2015-10-23  7:14 UTC (permalink / raw)
  To: Michael S. Tsirkin, Rick Jones; +Cc: netdev, linux-kernel, kvm, virtualization



On 10/23/2015 12:16 AM, Michael S. Tsirkin wrote:
> On Thu, Oct 22, 2015 at 08:46:33AM -0700, Rick Jones wrote:
>> On 10/22/2015 02:33 AM, Michael S. Tsirkin wrote:
>>> On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
>>>> This patch tries to poll for new added tx buffer for a while at the
>>>> end of tx processing. The maximum time spent on polling were limited
>>>> through a module parameter. To avoid block rx, the loop will end it
>>>> there's new other works queued on vhost so in fact socket receive
>>>> queue is also be polled.
>>>>
>>>> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
>>>>
>>>> size/session/+thu%/+normalize%
>>>>     1/     1/   +5%/  -20%
>>>>     1/    50/  +17%/   +3%
>>> Is there a measureable increase in cpu utilization
>>> with busyloop_timeout = 0?
>> And since a netperf TCP_RR test is involved, be careful about what netperf
>> reports for CPU util if that increase isn't in the context of the guest OS.

Right, the cpu utilization is measured on host.

>>
>> For completeness, looking at the effect on TCP_STREAM and TCP_MAERTS,
>> aggregate _RR and even aggregate _RR/packets per second for many VMs on the
>> same system would be in order.
>>
>> happy benchmarking,
>>
>> rick jones
> Absolutely, merging a new kernel API just for a specific
> benchmark doesn't make sense.
> I'm guessing this is just an early RFC, a fuller submission
> will probably include more numbers.
>

Yes, will run more complete tests.

Thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-23  7:13     ` Jason Wang
@ 2015-10-23 13:39       ` Michael S. Tsirkin
  2015-10-23 13:39       ` Michael S. Tsirkin
  1 sibling, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-23 13:39 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Fri, Oct 23, 2015 at 03:13:07PM +0800, Jason Wang wrote:
> 
> 
> On 10/22/2015 05:33 PM, Michael S. Tsirkin wrote:
> > On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
> >> This patch tries to poll for new added tx buffer for a while at the
> >> end of tx processing. The maximum time spent on polling were limited
> >> through a module parameter. To avoid block rx, the loop will end it
> >> there's new other works queued on vhost so in fact socket receive
> >> queue is also be polled.
> >>
> >> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
> >>
> >> size/session/+thu%/+normalize%
> >>     1/     1/   +5%/  -20%
> >>     1/    50/  +17%/   +3%
> > Is there a measureable increase in cpu utilization
> > with busyloop_timeout = 0?
> 
> Just run TCP_RR, no increasing. Will run a complete test on next version.
> 
> >
> >> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > We might be able to shave off the minor regression
> > by careful use of likely/unlikely, or maybe
> > deferring 
> 
> Yes, but what did "deferring" mean here?

Don't call local_clock until we know we'll need it.

> >
> >> ---
> >>  drivers/vhost/net.c | 19 +++++++++++++++++++
> >>  1 file changed, 19 insertions(+)
> >>
> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >> index 9eda69e..bbb522a 100644
> >> --- a/drivers/vhost/net.c
> >> +++ b/drivers/vhost/net.c
> >> @@ -31,7 +31,9 @@
> >>  #include "vhost.h"
> >>  
> >>  static int experimental_zcopytx = 1;
> >> +static int busyloop_timeout = 50;
> >>  module_param(experimental_zcopytx, int, 0444);
> >> +module_param(busyloop_timeout, int, 0444);
> > Pls add a description, including the units and the special
> > value 0.
> 
> Ok.
> 
> >
> >>  MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
> >>  		                       " 1 -Enable; 0 - Disable");
> >>  
> >> @@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
> >>  	rcu_read_unlock_bh();
> >>  }
> >>  
> >> +static bool tx_can_busy_poll(struct vhost_dev *dev,
> >> +			     unsigned long endtime)
> >> +{
> >> +	unsigned long now = local_clock() >> 10;
> > local_clock might go backwards if we jump between CPUs.
> > One way to fix would be to record the CPU id and break
> > out of loop if that changes.
> 
> Right, or maybe disable preemption in this case?
> 
> >
> > Also - defer this until we actually know we need it?
> 
> Right.
> 
> >
> >> +
> >> +	return busyloop_timeout && !need_resched() &&
> >> +	       !time_after(now, endtime) && !vhost_has_work(dev) &&
> >> +	       single_task_running();
> > signal pending as well?
> 
> Yes.
> 
> >> +}
> >> +
> >>  /* Expects to be always run from workqueue - which acts as
> >>   * read-size critical section for our kind of RCU. */
> >>  static void handle_tx(struct vhost_net *net)
> >>  {
> >>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> >>  	struct vhost_virtqueue *vq = &nvq->vq;
> >> +	unsigned long endtime;
> >>  	unsigned out, in;
> >>  	int head;
> >>  	struct msghdr msg = {
> >> @@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
> >>  			      % UIO_MAXIOV == nvq->done_idx))
> >>  			break;
> >>  
> >> +		endtime  = (local_clock() >> 10) + busyloop_timeout;
> >> +again:
> >>  		head = vhost_get_vq_desc(vq, vq->iov,
> >>  					 ARRAY_SIZE(vq->iov),
> >>  					 &out, &in,
> >> @@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
> >>  			break;
> >>  		/* Nothing new?  Wait for eventfd to tell us they refilled. */
> >>  		if (head == vq->num) {
> >> +			if (tx_can_busy_poll(vq->dev, endtime)) {
> >> +				cpu_relax();
> >> +				goto again;
> >> +			}
> >>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> >>  				vhost_disable_notify(&net->dev, vq);
> >>  				continue;
> >> -- 
> >> 1.8.3.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next RFC 2/2] vhost_net: basic polling support
  2015-10-23  7:13     ` Jason Wang
  2015-10-23 13:39       ` Michael S. Tsirkin
@ 2015-10-23 13:39       ` Michael S. Tsirkin
  1 sibling, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2015-10-23 13:39 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization

On Fri, Oct 23, 2015 at 03:13:07PM +0800, Jason Wang wrote:
> 
> 
> On 10/22/2015 05:33 PM, Michael S. Tsirkin wrote:
> > On Thu, Oct 22, 2015 at 01:27:29AM -0400, Jason Wang wrote:
> >> This patch tries to poll for new added tx buffer for a while at the
> >> end of tx processing. The maximum time spent on polling were limited
> >> through a module parameter. To avoid block rx, the loop will end it
> >> there's new other works queued on vhost so in fact socket receive
> >> queue is also be polled.
> >>
> >> busyloop_timeout = 50 gives us following improvement on TCP_RR test:
> >>
> >> size/session/+thu%/+normalize%
> >>     1/     1/   +5%/  -20%
> >>     1/    50/  +17%/   +3%
> > Is there a measureable increase in cpu utilization
> > with busyloop_timeout = 0?
> 
> Just run TCP_RR, no increasing. Will run a complete test on next version.
> 
> >
> >> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > We might be able to shave off the minor regression
> > by careful use of likely/unlikely, or maybe
> > deferring 
> 
> Yes, but what did "deferring" mean here?

Don't call local_clock until we know we'll need it.

> >
> >> ---
> >>  drivers/vhost/net.c | 19 +++++++++++++++++++
> >>  1 file changed, 19 insertions(+)
> >>
> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >> index 9eda69e..bbb522a 100644
> >> --- a/drivers/vhost/net.c
> >> +++ b/drivers/vhost/net.c
> >> @@ -31,7 +31,9 @@
> >>  #include "vhost.h"
> >>  
> >>  static int experimental_zcopytx = 1;
> >> +static int busyloop_timeout = 50;
> >>  module_param(experimental_zcopytx, int, 0444);
> >> +module_param(busyloop_timeout, int, 0444);
> > Pls add a description, including the units and the special
> > value 0.
> 
> Ok.
> 
> >
> >>  MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
> >>  		                       " 1 -Enable; 0 - Disable");
> >>  
> >> @@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
> >>  	rcu_read_unlock_bh();
> >>  }
> >>  
> >> +static bool tx_can_busy_poll(struct vhost_dev *dev,
> >> +			     unsigned long endtime)
> >> +{
> >> +	unsigned long now = local_clock() >> 10;
> > local_clock might go backwards if we jump between CPUs.
> > One way to fix would be to record the CPU id and break
> > out of loop if that changes.
> 
> Right, or maybe disable preemption in this case?
> 
> >
> > Also - defer this until we actually know we need it?
> 
> Right.
> 
> >
> >> +
> >> +	return busyloop_timeout && !need_resched() &&
> >> +	       !time_after(now, endtime) && !vhost_has_work(dev) &&
> >> +	       single_task_running();
> > signal pending as well?
> 
> Yes.
> 
> >> +}
> >> +
> >>  /* Expects to be always run from workqueue - which acts as
> >>   * read-size critical section for our kind of RCU. */
> >>  static void handle_tx(struct vhost_net *net)
> >>  {
> >>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> >>  	struct vhost_virtqueue *vq = &nvq->vq;
> >> +	unsigned long endtime;
> >>  	unsigned out, in;
> >>  	int head;
> >>  	struct msghdr msg = {
> >> @@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
> >>  			      % UIO_MAXIOV == nvq->done_idx))
> >>  			break;
> >>  
> >> +		endtime  = (local_clock() >> 10) + busyloop_timeout;
> >> +again:
> >>  		head = vhost_get_vq_desc(vq, vq->iov,
> >>  					 ARRAY_SIZE(vq->iov),
> >>  					 &out, &in,
> >> @@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
> >>  			break;
> >>  		/* Nothing new?  Wait for eventfd to tell us they refilled. */
> >>  		if (head == vq->num) {
> >> +			if (tx_can_busy_poll(vq->dev, endtime)) {
> >> +				cpu_relax();
> >> +				goto again;
> >> +			}
> >>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> >>  				vhost_disable_notify(&net->dev, vq);
> >>  				continue;
> >> -- 
> >> 1.8.3.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2015-10-23 13:39 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-22  5:27 [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work() Jason Wang
2015-10-22  5:27 ` Jason Wang
2015-10-22  5:27 ` [PATCH net-next RFC 2/2] vhost_net: basic polling support Jason Wang
2015-10-22  5:27   ` Jason Wang
2015-10-22  9:33   ` Michael S. Tsirkin
2015-10-22  9:33     ` Michael S. Tsirkin
2015-10-22 15:46     ` Rick Jones
2015-10-22 15:46     ` Rick Jones
2015-10-22 16:16       ` Michael S. Tsirkin
2015-10-23  7:14         ` Jason Wang
2015-10-23  7:14           ` Jason Wang
2015-10-22 16:16       ` Michael S. Tsirkin
2015-10-23  7:13     ` Jason Wang
2015-10-23  7:13     ` Jason Wang
2015-10-23 13:39       ` Michael S. Tsirkin
2015-10-23 13:39       ` Michael S. Tsirkin
2015-10-22  8:38 ` [PATCH net-next RFC 1/2] vhost: introduce vhost_has_work() Michael S. Tsirkin
2015-10-22  8:38   ` Michael S. Tsirkin
2015-10-23  7:10   ` Jason Wang
2015-10-23  7:10     ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.