linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] vhost_net: remove tx polling state
@ 2013-04-11  6:50 Jason Wang
  2013-04-11  7:24 ` Michael S. Tsirkin
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Wang @ 2013-04-11  6:50 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel; +Cc: Jason Wang

After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
errors when setting backend), we in fact track the polling state through
poll->wqh, so there's no need to duplicate the work with an extra
vhost_net_polling_state. So this patch removes this and make the code simpler.

This patch also removes the all tx starting/stopping code in tx path according
to Michael's suggestion.

Netperf test shows almost the same result in stream test, but gets improvements
on TCP_RR tests (both zerocopy or copy) especially on low load cases.

Tested between multiqueue kvm guest and external host with two direct
connected 82599s.

zerocopy disabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 9510.24/11727.29/+23.3%    | 693.54/887.68/+28.0%   |
25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
50| 277634.64/291905.76/+5%    | 3118.36/3230.11/+3.6%  |

zerocopy enabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 7318.33/11929.76/+63.0%    | 521.86/843.30/+61.6%   |
25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
50| 272181.02/294347.04/+8.1%  | 3071.56/3257.85/+6.1%  |

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c   |   74 ++++---------------------------------------------
 drivers/vhost/vhost.c |    3 ++
 2 files changed, 9 insertions(+), 68 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ec6fb3f..87c216c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,10 @@ enum {
 	VHOST_NET_VQ_MAX = 2,
 };
 
-enum vhost_net_poll_state {
-	VHOST_NET_POLL_DISABLED = 0,
-	VHOST_NET_POLL_STARTED = 1,
-	VHOST_NET_POLL_STOPPED = 2,
-};
-
 struct vhost_net {
 	struct vhost_dev dev;
 	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
 	struct vhost_poll poll[VHOST_NET_VQ_MAX];
-	/* Tells us whether we are polling a socket for TX.
-	 * We only do this when socket buffer fills up.
-	 * Protected by tx vq lock. */
-	enum vhost_net_poll_state tx_poll_state;
 	/* Number of TX recently submitted.
 	 * Protected by tx vq lock. */
 	unsigned tx_packets;
@@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
 	}
 }
 
-/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
-{
-	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
-		return;
-	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
-	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
-}
-
-/* Caller must have TX VQ lock */
-static int tx_poll_start(struct vhost_net *net, struct socket *sock)
-{
-	int ret;
-
-	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
-		return 0;
-	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
-	if (!ret)
-		net->tx_poll_state = VHOST_NET_POLL_STARTED;
-	return ret;
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net)
 		.msg_flags = MSG_DONTWAIT,
 	};
 	size_t len, total_len = 0;
-	int err, wmem;
+	int err;
 	size_t hdr_size;
 	struct socket *sock;
 	struct vhost_ubuf_ref *uninitialized_var(ubufs);
@@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net)
 	if (!sock)
 		return;
 
-	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
-	if (wmem >= sock->sk->sk_sndbuf) {
-		mutex_lock(&vq->mutex);
-		tx_poll_start(net, sock);
-		mutex_unlock(&vq->mutex);
-		return;
-	}
-
 	mutex_lock(&vq->mutex);
 	vhost_disable_notify(&net->dev, vq);
 
-	if (wmem < sock->sk->sk_sndbuf / 2)
-		tx_poll_stop(net);
 	hdr_size = vq->vhost_hlen;
 	zcopy = vq->ubufs;
 
@@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net)
 		if (head == vq->num) {
 			int num_pends;
 
-			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
-			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
-				tx_poll_start(net, sock);
-				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
-				break;
-			}
 			/* If more outstanding DMAs, queue the work.
 			 * Handle upend_idx wrap around
 			 */
 			num_pends = likely(vq->upend_idx >= vq->done_idx) ?
 				    (vq->upend_idx - vq->done_idx) :
 				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
-			if (unlikely(num_pends > VHOST_MAX_PEND)) {
-				tx_poll_start(net, sock);
-				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+			if (unlikely(num_pends > VHOST_MAX_PEND))
 				break;
-			}
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				vhost_disable_notify(&net->dev, vq);
 				continue;
@@ -364,8 +313,6 @@ static void handle_tx(struct vhost_net *net)
 					UIO_MAXIOV;
 			}
 			vhost_discard_vq_desc(vq, 1);
-			if (err == -EAGAIN || err == -ENOBUFS)
-				tx_poll_start(net, sock);
 			break;
 		}
 		if (err != len)
@@ -628,7 +575,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 
 	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
 	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
-	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
 
 	f->private_data = n;
 
@@ -638,32 +584,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 static void vhost_net_disable_vq(struct vhost_net *n,
 				 struct vhost_virtqueue *vq)
 {
+	struct vhost_poll *poll = n->poll + (vq - n->vqs);
 	if (!vq->private_data)
 		return;
-	if (vq == n->vqs + VHOST_NET_VQ_TX) {
-		tx_poll_stop(n);
-		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
-	} else
-		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+	vhost_poll_stop(poll);
 }
 
 static int vhost_net_enable_vq(struct vhost_net *n,
 				struct vhost_virtqueue *vq)
 {
+	struct vhost_poll *poll = n->poll + (vq - n->vqs);
 	struct socket *sock;
-	int ret;
 
 	sock = rcu_dereference_protected(vq->private_data,
 					 lockdep_is_held(&vq->mutex));
 	if (!sock)
 		return 0;
-	if (vq == n->vqs + VHOST_NET_VQ_TX) {
-		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
-		ret = tx_poll_start(n, sock);
-	} else
-		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
 
-	return ret;
+	return vhost_poll_start(poll, sock->file);
 }
 
 static struct socket *vhost_net_stop_vq(struct vhost_net *n,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9759249..4eecdb8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
 	unsigned long mask;
 	int ret = 0;
 
+	if (poll->wqh)
+		return 0;
+
 	mask = file->f_op->poll(file, &poll->table);
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-04-11  6:50 [PATCH] vhost_net: remove tx polling state Jason Wang
@ 2013-04-11  7:24 ` Michael S. Tsirkin
  2013-04-11 20:16   ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-04-11  7:24 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel, davem

On Thu, Apr 11, 2013 at 02:50:48PM +0800, Jason Wang wrote:
> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
> errors when setting backend), we in fact track the polling state through
> poll->wqh, so there's no need to duplicate the work with an extra
> vhost_net_polling_state. So this patch removes this and make the code simpler.
> 
> This patch also removes the all tx starting/stopping code in tx path according
> to Michael's suggestion.
> 
> Netperf test shows almost the same result in stream test, but gets improvements
> on TCP_RR tests (both zerocopy or copy) especially on low load cases.
> 
> Tested between multiqueue kvm guest and external host with two direct
> connected 82599s.
> 
> zerocopy disabled:
> 
> sessions|transaction rates|normalize|
> before/after/+improvements
> 1 | 9510.24/11727.29/+23.3%    | 693.54/887.68/+28.0%   |
> 25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
> 50| 277634.64/291905.76/+5%    | 3118.36/3230.11/+3.6%  |
> 
> zerocopy enabled:
> 
> sessions|transaction rates|normalize|
> before/after/+improvements
> 1 | 7318.33/11929.76/+63.0%    | 521.86/843.30/+61.6%   |
> 25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
> 50| 272181.02/294347.04/+8.1%  | 3071.56/3257.85/+6.1%  |
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Less code and better speed, what's not to like.
Davem, could you pick this up for 3.10 please?

Acked-by: Michael S. Tsirkin <mst@redhat.com>


> ---
>  drivers/vhost/net.c   |   74 ++++---------------------------------------------
>  drivers/vhost/vhost.c |    3 ++
>  2 files changed, 9 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index ec6fb3f..87c216c 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -64,20 +64,10 @@ enum {
>  	VHOST_NET_VQ_MAX = 2,
>  };
>  
> -enum vhost_net_poll_state {
> -	VHOST_NET_POLL_DISABLED = 0,
> -	VHOST_NET_POLL_STARTED = 1,
> -	VHOST_NET_POLL_STOPPED = 2,
> -};
> -
>  struct vhost_net {
>  	struct vhost_dev dev;
>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
> -	/* Tells us whether we are polling a socket for TX.
> -	 * We only do this when socket buffer fills up.
> -	 * Protected by tx vq lock. */
> -	enum vhost_net_poll_state tx_poll_state;
>  	/* Number of TX recently submitted.
>  	 * Protected by tx vq lock. */
>  	unsigned tx_packets;
> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
>  	}
>  }
>  
> -/* Caller must have TX VQ lock */
> -static void tx_poll_stop(struct vhost_net *net)
> -{
> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> -		return;
> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -}
> -
> -/* Caller must have TX VQ lock */
> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
> -{
> -	int ret;
> -
> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> -		return 0;
> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> -	if (!ret)
> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
> -	return ret;
> -}
> -
>  /* In case of DMA done not in order in lower device driver for some reason.
>   * upend_idx is used to track end of used idx, done_idx is used to track head
>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
> @@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net)
>  		.msg_flags = MSG_DONTWAIT,
>  	};
>  	size_t len, total_len = 0;
> -	int err, wmem;
> +	int err;
>  	size_t hdr_size;
>  	struct socket *sock;
>  	struct vhost_ubuf_ref *uninitialized_var(ubufs);
> @@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net)
>  	if (!sock)
>  		return;
>  
> -	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
> -	if (wmem >= sock->sk->sk_sndbuf) {
> -		mutex_lock(&vq->mutex);
> -		tx_poll_start(net, sock);
> -		mutex_unlock(&vq->mutex);
> -		return;
> -	}
> -
>  	mutex_lock(&vq->mutex);
>  	vhost_disable_notify(&net->dev, vq);
>  
> -	if (wmem < sock->sk->sk_sndbuf / 2)
> -		tx_poll_stop(net);
>  	hdr_size = vq->vhost_hlen;
>  	zcopy = vq->ubufs;
>  
> @@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net)
>  		if (head == vq->num) {
>  			int num_pends;
>  
> -			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
> -			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
> -				tx_poll_start(net, sock);
> -				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
> -				break;
> -			}
>  			/* If more outstanding DMAs, queue the work.
>  			 * Handle upend_idx wrap around
>  			 */
>  			num_pends = likely(vq->upend_idx >= vq->done_idx) ?
>  				    (vq->upend_idx - vq->done_idx) :
>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
> -			if (unlikely(num_pends > VHOST_MAX_PEND)) {
> -				tx_poll_start(net, sock);
> -				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
> +			if (unlikely(num_pends > VHOST_MAX_PEND))
>  				break;
> -			}
>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
>  				vhost_disable_notify(&net->dev, vq);
>  				continue;
> @@ -364,8 +313,6 @@ static void handle_tx(struct vhost_net *net)
>  					UIO_MAXIOV;
>  			}
>  			vhost_discard_vq_desc(vq, 1);
> -			if (err == -EAGAIN || err == -ENOBUFS)
> -				tx_poll_start(net, sock);
>  			break;
>  		}
>  		if (err != len)
> @@ -628,7 +575,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  
>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>  
>  	f->private_data = n;
>  
> @@ -638,32 +584,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  static void vhost_net_disable_vq(struct vhost_net *n,
>  				 struct vhost_virtqueue *vq)
>  {
> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>  	if (!vq->private_data)
>  		return;
> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> -		tx_poll_stop(n);
> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> -	} else
> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
> +	vhost_poll_stop(poll);
>  }
>  
>  static int vhost_net_enable_vq(struct vhost_net *n,
>  				struct vhost_virtqueue *vq)
>  {
> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>  	struct socket *sock;
> -	int ret;
>  
>  	sock = rcu_dereference_protected(vq->private_data,
>  					 lockdep_is_held(&vq->mutex));
>  	if (!sock)
>  		return 0;
> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -		ret = tx_poll_start(n, sock);
> -	} else
> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
>  
> -	return ret;
> +	return vhost_poll_start(poll, sock->file);
>  }
>  
>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 9759249..4eecdb8 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
>  	unsigned long mask;
>  	int ret = 0;
>  
> +	if (poll->wqh)
> +		return 0;
> +
>  	mask = file->f_op->poll(file, &poll->table);
>  	if (mask)
>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
> -- 
> 1.7.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-04-11  7:24 ` Michael S. Tsirkin
@ 2013-04-11 20:16   ` David Miller
  0 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2013-04-11 20:16 UTC (permalink / raw)
  To: mst; +Cc: jasowang, kvm, virtualization, netdev, linux-kernel

From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 11 Apr 2013 10:24:30 +0300

> On Thu, Apr 11, 2013 at 02:50:48PM +0800, Jason Wang wrote:
>> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
>> errors when setting backend), we in fact track the polling state through
>> poll->wqh, so there's no need to duplicate the work with an extra
>> vhost_net_polling_state. So this patch removes this and make the code simpler.
>> 
>> This patch also removes the all tx starting/stopping code in tx path according
>> to Michael's suggestion.
>> 
>> Netperf test shows almost the same result in stream test, but gets improvements
>> on TCP_RR tests (both zerocopy or copy) especially on low load cases.
>> 
>> Tested between multiqueue kvm guest and external host with two direct
>> connected 82599s.
 ...
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> 
> Less code and better speed, what's not to like.
> Davem, could you pick this up for 3.10 please?
> 
> Acked-by: Michael S. Tsirkin <mst@redhat.com>

Applied to net-next, thanks everyone.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-07  4:31 Jason Wang
  2013-03-07 21:25 ` David Miller
  2013-03-10 16:50 ` Michael S. Tsirkin
@ 2013-03-11  8:47 ` Michael S. Tsirkin
  2 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-03-11  8:47 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
> errors when setting backend), we in fact track the polling state through
> poll->wqh, so there's no need to duplicate the work with an extra
> vhost_net_polling_state. So this patch removes this and make the code simpler.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

OK NACK as Jason said he wants to rework this patch.

> ---
>  drivers/vhost/net.c   |   60 ++++++++----------------------------------------
>  drivers/vhost/vhost.c |    3 ++
>  2 files changed, 13 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 959b1cd..d1a03dd 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -64,20 +64,10 @@ enum {
>  	VHOST_NET_VQ_MAX = 2,
>  };
>  
> -enum vhost_net_poll_state {
> -	VHOST_NET_POLL_DISABLED = 0,
> -	VHOST_NET_POLL_STARTED = 1,
> -	VHOST_NET_POLL_STOPPED = 2,
> -};
> -
>  struct vhost_net {
>  	struct vhost_dev dev;
>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
> -	/* Tells us whether we are polling a socket for TX.
> -	 * We only do this when socket buffer fills up.
> -	 * Protected by tx vq lock. */
> -	enum vhost_net_poll_state tx_poll_state;
>  	/* Number of TX recently submitted.
>  	 * Protected by tx vq lock. */
>  	unsigned tx_packets;
> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
>  	}
>  }
>  
> -/* Caller must have TX VQ lock */
> -static void tx_poll_stop(struct vhost_net *net)
> -{
> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> -		return;
> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -}
> -
> -/* Caller must have TX VQ lock */
> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
> -{
> -	int ret;
> -
> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> -		return 0;
> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> -	if (!ret)
> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
> -	return ret;
> -}
> -
>  /* In case of DMA done not in order in lower device driver for some reason.
>   * upend_idx is used to track end of used idx, done_idx is used to track head
>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
> @@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>  static void handle_tx(struct vhost_net *net)
>  {
>  	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
> +	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>  	unsigned out, in, s;
>  	int head;
>  	struct msghdr msg = {
> @@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>  	if (wmem >= sock->sk->sk_sndbuf) {
>  		mutex_lock(&vq->mutex);
> -		tx_poll_start(net, sock);
> +		vhost_poll_start(poll, sock->file);
>  		mutex_unlock(&vq->mutex);
>  		return;
>  	}
> @@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
>  	vhost_disable_notify(&net->dev, vq);
>  
>  	if (wmem < sock->sk->sk_sndbuf / 2)
> -		tx_poll_stop(net);
> +		vhost_poll_stop(poll);
>  	hdr_size = vq->vhost_hlen;
>  	zcopy = vq->ubufs;
>  
> @@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
>  
>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
> -				tx_poll_start(net, sock);
> +				vhost_poll_start(poll, sock->file);
>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>  				break;
>  			}
> @@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
>  				    (vq->upend_idx - vq->done_idx) :
>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
> -				tx_poll_start(net, sock);
> +				vhost_poll_start(poll, sock->file);
>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>  				break;
>  			}
> @@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
>  			}
>  			vhost_discard_vq_desc(vq, 1);
>  			if (err == -EAGAIN || err == -ENOBUFS)
> -				tx_poll_start(net, sock);
> +				vhost_poll_start(poll, sock->file);
>  			break;
>  		}
>  		if (err != len)
> @@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  
>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>  
>  	f->private_data = n;
>  
> @@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  static void vhost_net_disable_vq(struct vhost_net *n,
>  				 struct vhost_virtqueue *vq)
>  {
> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>  	if (!vq->private_data)
>  		return;
> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> -		tx_poll_stop(n);
> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> -	} else
> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
> +	vhost_poll_stop(poll);
>  }
>  
>  static int vhost_net_enable_vq(struct vhost_net *n,
>  				struct vhost_virtqueue *vq)
>  {
> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>  	struct socket *sock;
> -	int ret;
>  
>  	sock = rcu_dereference_protected(vq->private_data,
>  					 lockdep_is_held(&vq->mutex));
>  	if (!sock)
>  		return 0;
> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -		ret = tx_poll_start(n, sock);
> -	} else
> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
>  
> -	return ret;
> +	return vhost_poll_start(poll, sock->file);
>  }
>  
>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 9759249..4eecdb8 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
>  	unsigned long mask;
>  	int ret = 0;
>  
> +	if (poll->wqh)
> +		return 0;
> +
>  	mask = file->f_op->poll(file, &poll->table);
>  	if (mask)
>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
> -- 
> 1.7.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-11  8:29     ` Michael S. Tsirkin
@ 2013-03-11  8:45       ` Jason Wang
  0 siblings, 0 replies; 12+ messages in thread
From: Jason Wang @ 2013-03-11  8:45 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel

On 03/11/2013 04:29 PM, Michael S. Tsirkin wrote:
> On Mon, Mar 11, 2013 at 03:09:10PM +0800, Jason Wang wrote:
>> On 03/11/2013 12:50 AM, Michael S. Tsirkin wrote:
>>> On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
>>>> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
>>>> errors when setting backend), we in fact track the polling state through
>>>> poll->wqh, so there's no need to duplicate the work with an extra
>>>> vhost_net_polling_state. So this patch removes this and make the code simpler.
>>>>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> I'd prefer a more radical approach, since I think it can be even
>>> simpler: tap and macvtap backends both only send events when tx queue
>>> overruns which should almost never happen.
>>>
>>> So let's just start polling when VQ is enabled
>>> drop all poll_start/stop calls on data path.
>> I think then it may damage the performance at least for tx. We
>> conditionally enable the tx polling in the past to reduce the
>> unnecessary wakeups of vhost thead when zerocopy or sndbuf is used. If
>> we don't stop the polling, after each packet were transmitted,
>> tap/macvtap will try to wakeup the vhost thread.
> It won't actually.
> static void macvtap_sock_write_space(struct sock *sk)
> {
>         wait_queue_head_t *wqueue;
>
>         if (!sock_writeable(sk) ||
>             !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
>                 return;
>
>         wqueue = sk_sleep(sk);
>         if (wqueue && waitqueue_active(wqueue))
>                 wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
> }
>
> As long as SOCK_ASYNC_NOSPACE is not set, there's no wakeup.
>

Ok, will send V2 which removes all the polling start/stop in datapath.

Thanks

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-11  7:09   ` Jason Wang
  2013-03-11  7:33     ` Jason Wang
@ 2013-03-11  8:29     ` Michael S. Tsirkin
  2013-03-11  8:45       ` Jason Wang
  1 sibling, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-03-11  8:29 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Mon, Mar 11, 2013 at 03:09:10PM +0800, Jason Wang wrote:
> On 03/11/2013 12:50 AM, Michael S. Tsirkin wrote:
> > On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
> >> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
> >> errors when setting backend), we in fact track the polling state through
> >> poll->wqh, so there's no need to duplicate the work with an extra
> >> vhost_net_polling_state. So this patch removes this and make the code simpler.
> >>
> >> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > I'd prefer a more radical approach, since I think it can be even
> > simpler: tap and macvtap backends both only send events when tx queue
> > overruns which should almost never happen.
> >
> > So let's just start polling when VQ is enabled
> > drop all poll_start/stop calls on data path.
> 
> I think then it may damage the performance at least for tx. We
> conditionally enable the tx polling in the past to reduce the
> unnecessary wakeups of vhost thead when zerocopy or sndbuf is used. If
> we don't stop the polling, after each packet were transmitted,
> tap/macvtap will try to wakeup the vhost thread.

It won't actually.
static void macvtap_sock_write_space(struct sock *sk)
{
        wait_queue_head_t *wqueue;

        if (!sock_writeable(sk) ||
            !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
                return;

        wqueue = sk_sleep(sk);
        if (wqueue && waitqueue_active(wqueue))
                wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
}

As long as SOCK_ASYNC_NOSPACE is not set, there's no wakeup.


> Actually, I'm considering the reverse direction. Looks like we can
> disable the rx polling like what we do for tx in handle_tx() to reduce
> the uncessary wakeups.
> > The optimization was written for packet socket backend but I know of no
> > one caring about performance of that one that much.
> > Needs a bit of perf testing to make sure I didn't miss anything though
> > but it's not 3.9 material anyway so there's no rush.
> >
> >> ---
> >>  drivers/vhost/net.c   |   60 ++++++++----------------------------------------
> >>  drivers/vhost/vhost.c |    3 ++
> >>  2 files changed, 13 insertions(+), 50 deletions(-)
> >>
> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >> index 959b1cd..d1a03dd 100644
> >> --- a/drivers/vhost/net.c
> >> +++ b/drivers/vhost/net.c
> >> @@ -64,20 +64,10 @@ enum {
> >>  	VHOST_NET_VQ_MAX = 2,
> >>  };
> >>  
> >> -enum vhost_net_poll_state {
> >> -	VHOST_NET_POLL_DISABLED = 0,
> >> -	VHOST_NET_POLL_STARTED = 1,
> >> -	VHOST_NET_POLL_STOPPED = 2,
> >> -};
> >> -
> >>  struct vhost_net {
> >>  	struct vhost_dev dev;
> >>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
> >>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
> >> -	/* Tells us whether we are polling a socket for TX.
> >> -	 * We only do this when socket buffer fills up.
> >> -	 * Protected by tx vq lock. */
> >> -	enum vhost_net_poll_state tx_poll_state;
> >>  	/* Number of TX recently submitted.
> >>  	 * Protected by tx vq lock. */
> >>  	unsigned tx_packets;
> >> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
> >>  	}
> >>  }
> >>  
> >> -/* Caller must have TX VQ lock */
> >> -static void tx_poll_stop(struct vhost_net *net)
> >> -{
> >> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> >> -		return;
> >> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> >> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> >> -}
> >> -
> >> -/* Caller must have TX VQ lock */
> >> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
> >> -{
> >> -	int ret;
> >> -
> >> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> >> -		return 0;
> >> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> >> -	if (!ret)
> >> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
> >> -	return ret;
> >> -}
> >> -
> >>  /* In case of DMA done not in order in lower device driver for some reason.
> >>   * upend_idx is used to track end of used idx, done_idx is used to track head
> >>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
> >> @@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
> >>  static void handle_tx(struct vhost_net *net)
> >>  {
> >>  	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
> >> +	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
> >>  	unsigned out, in, s;
> >>  	int head;
> >>  	struct msghdr msg = {
> >> @@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
> >>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
> >>  	if (wmem >= sock->sk->sk_sndbuf) {
> >>  		mutex_lock(&vq->mutex);
> >> -		tx_poll_start(net, sock);
> >> +		vhost_poll_start(poll, sock->file);
> >>  		mutex_unlock(&vq->mutex);
> >>  		return;
> >>  	}
> >> @@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
> >>  	vhost_disable_notify(&net->dev, vq);
> >>  
> >>  	if (wmem < sock->sk->sk_sndbuf / 2)
> >> -		tx_poll_stop(net);
> >> +		vhost_poll_stop(poll);
> >>  	hdr_size = vq->vhost_hlen;
> >>  	zcopy = vq->ubufs;
> >>  
> >> @@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
> >>  
> >>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
> >>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
> >> -				tx_poll_start(net, sock);
> >> +				vhost_poll_start(poll, sock->file);
> >>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
> >>  				break;
> >>  			}
> >> @@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
> >>  				    (vq->upend_idx - vq->done_idx) :
> >>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
> >>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
> >> -				tx_poll_start(net, sock);
> >> +				vhost_poll_start(poll, sock->file);
> >>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
> >>  				break;
> >>  			}
> >> @@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
> >>  			}
> >>  			vhost_discard_vq_desc(vq, 1);
> >>  			if (err == -EAGAIN || err == -ENOBUFS)
> >> -				tx_poll_start(net, sock);
> >> +				vhost_poll_start(poll, sock->file);
> >>  			break;
> >>  		}
> >>  		if (err != len)
> >> @@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
> >>  
> >>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
> >>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
> >> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> >>  
> >>  	f->private_data = n;
> >>  
> >> @@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
> >>  static void vhost_net_disable_vq(struct vhost_net *n,
> >>  				 struct vhost_virtqueue *vq)
> >>  {
> >> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
> >>  	if (!vq->private_data)
> >>  		return;
> >> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> >> -		tx_poll_stop(n);
> >> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> >> -	} else
> >> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
> >> +	vhost_poll_stop(poll);
> >>  }
> >>  
> >>  static int vhost_net_enable_vq(struct vhost_net *n,
> >>  				struct vhost_virtqueue *vq)
> >>  {
> >> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
> >>  	struct socket *sock;
> >> -	int ret;
> >>  
> >>  	sock = rcu_dereference_protected(vq->private_data,
> >>  					 lockdep_is_held(&vq->mutex));
> >>  	if (!sock)
> >>  		return 0;
> >> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> >> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
> >> -		ret = tx_poll_start(n, sock);
> >> -	} else
> >> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
> >>  
> >> -	return ret;
> >> +	return vhost_poll_start(poll, sock->file);
> >>  }
> >>  
> >>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> >> index 9759249..4eecdb8 100644
> >> --- a/drivers/vhost/vhost.c
> >> +++ b/drivers/vhost/vhost.c
> >> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
> >>  	unsigned long mask;
> >>  	int ret = 0;
> >>  
> >> +	if (poll->wqh)
> >> +		return 0;
> >> +
> >>  	mask = file->f_op->poll(file, &poll->table);
> >>  	if (mask)
> >>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
> >> -- 
> >> 1.7.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-11  7:33     ` Jason Wang
@ 2013-03-11  8:26       ` Michael S. Tsirkin
  0 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-03-11  8:26 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Mon, Mar 11, 2013 at 03:33:16PM +0800, Jason Wang wrote:
> On 03/11/2013 03:09 PM, Jason Wang wrote:
> > On 03/11/2013 12:50 AM, Michael S. Tsirkin wrote:
> >> On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
> >>> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
> >>> errors when setting backend), we in fact track the polling state through
> >>> poll->wqh, so there's no need to duplicate the work with an extra
> >>> vhost_net_polling_state. So this patch removes this and make the code simpler.
> >>>
> >>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >> I'd prefer a more radical approach, since I think it can be even
> >> simpler: tap and macvtap backends both only send events when tx queue
> >> overruns which should almost never happen.
> >>
> >> So let's just start polling when VQ is enabled
> >> drop all poll_start/stop calls on data path.
> > I think then it may damage the performance at least for tx. We
> > conditionally enable the tx polling in the past to reduce the
> > unnecessary wakeups of vhost thead when zerocopy or sndbuf is used. If
> > we don't stop the polling, after each packet were transmitted,
> > tap/macvtap will try to wakeup the vhost thread.
> 
> Or for zerocopy, looks like we can in fact just depends on
> ubuf->callback in stead of tx polling to wakeup tx thread to do tx
> completion, so we can even drop the num_pends and VHOST_MAX_PEND stuffs.

I think we still need to limit the number of outstanding buffers so
VHOST_MAX_PEND needs to stay.  I'm not against switching to callback
for this but let's make this a separate change.

> > Actually, I'm considering the reverse direction. Looks like we can
> > disable the rx polling like what we do for tx in handle_tx() to reduce
> > the uncessary wakeups.
> >> The optimization was written for packet socket backend but I know of no
> >> one caring about performance of that one that much.
> >> Needs a bit of perf testing to make sure I didn't miss anything though
> >> but it's not 3.9 material anyway so there's no rush.
> >>
> >>> ---
> >>>  drivers/vhost/net.c   |   60 ++++++++----------------------------------------
> >>>  drivers/vhost/vhost.c |    3 ++
> >>>  2 files changed, 13 insertions(+), 50 deletions(-)
> >>>
> >>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >>> index 959b1cd..d1a03dd 100644
> >>> --- a/drivers/vhost/net.c
> >>> +++ b/drivers/vhost/net.c
> >>> @@ -64,20 +64,10 @@ enum {
> >>>  	VHOST_NET_VQ_MAX = 2,
> >>>  };
> >>>  
> >>> -enum vhost_net_poll_state {
> >>> -	VHOST_NET_POLL_DISABLED = 0,
> >>> -	VHOST_NET_POLL_STARTED = 1,
> >>> -	VHOST_NET_POLL_STOPPED = 2,
> >>> -};
> >>> -
> >>>  struct vhost_net {
> >>>  	struct vhost_dev dev;
> >>>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
> >>>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
> >>> -	/* Tells us whether we are polling a socket for TX.
> >>> -	 * We only do this when socket buffer fills up.
> >>> -	 * Protected by tx vq lock. */
> >>> -	enum vhost_net_poll_state tx_poll_state;
> >>>  	/* Number of TX recently submitted.
> >>>  	 * Protected by tx vq lock. */
> >>>  	unsigned tx_packets;
> >>> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
> >>>  	}
> >>>  }
> >>>  
> >>> -/* Caller must have TX VQ lock */
> >>> -static void tx_poll_stop(struct vhost_net *net)
> >>> -{
> >>> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> >>> -		return;
> >>> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> >>> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> >>> -}
> >>> -
> >>> -/* Caller must have TX VQ lock */
> >>> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
> >>> -{
> >>> -	int ret;
> >>> -
> >>> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> >>> -		return 0;
> >>> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> >>> -	if (!ret)
> >>> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
> >>> -	return ret;
> >>> -}
> >>> -
> >>>  /* In case of DMA done not in order in lower device driver for some reason.
> >>>   * upend_idx is used to track end of used idx, done_idx is used to track head
> >>>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
> >>> @@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
> >>>  static void handle_tx(struct vhost_net *net)
> >>>  {
> >>>  	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
> >>> +	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
> >>>  	unsigned out, in, s;
> >>>  	int head;
> >>>  	struct msghdr msg = {
> >>> @@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
> >>>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
> >>>  	if (wmem >= sock->sk->sk_sndbuf) {
> >>>  		mutex_lock(&vq->mutex);
> >>> -		tx_poll_start(net, sock);
> >>> +		vhost_poll_start(poll, sock->file);
> >>>  		mutex_unlock(&vq->mutex);
> >>>  		return;
> >>>  	}
> >>> @@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
> >>>  	vhost_disable_notify(&net->dev, vq);
> >>>  
> >>>  	if (wmem < sock->sk->sk_sndbuf / 2)
> >>> -		tx_poll_stop(net);
> >>> +		vhost_poll_stop(poll);
> >>>  	hdr_size = vq->vhost_hlen;
> >>>  	zcopy = vq->ubufs;
> >>>  
> >>> @@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
> >>>  
> >>>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
> >>>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
> >>> -				tx_poll_start(net, sock);
> >>> +				vhost_poll_start(poll, sock->file);
> >>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
> >>>  				break;
> >>>  			}
> >>> @@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
> >>>  				    (vq->upend_idx - vq->done_idx) :
> >>>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
> >>>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
> >>> -				tx_poll_start(net, sock);
> >>> +				vhost_poll_start(poll, sock->file);
> >>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
> >>>  				break;
> >>>  			}
> >>> @@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
> >>>  			}
> >>>  			vhost_discard_vq_desc(vq, 1);
> >>>  			if (err == -EAGAIN || err == -ENOBUFS)
> >>> -				tx_poll_start(net, sock);
> >>> +				vhost_poll_start(poll, sock->file);
> >>>  			break;
> >>>  		}
> >>>  		if (err != len)
> >>> @@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
> >>>  
> >>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
> >>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
> >>> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> >>>  
> >>>  	f->private_data = n;
> >>>  
> >>> @@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
> >>>  static void vhost_net_disable_vq(struct vhost_net *n,
> >>>  				 struct vhost_virtqueue *vq)
> >>>  {
> >>> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
> >>>  	if (!vq->private_data)
> >>>  		return;
> >>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> >>> -		tx_poll_stop(n);
> >>> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> >>> -	} else
> >>> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
> >>> +	vhost_poll_stop(poll);
> >>>  }
> >>>  
> >>>  static int vhost_net_enable_vq(struct vhost_net *n,
> >>>  				struct vhost_virtqueue *vq)
> >>>  {
> >>> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
> >>>  	struct socket *sock;
> >>> -	int ret;
> >>>  
> >>>  	sock = rcu_dereference_protected(vq->private_data,
> >>>  					 lockdep_is_held(&vq->mutex));
> >>>  	if (!sock)
> >>>  		return 0;
> >>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> >>> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
> >>> -		ret = tx_poll_start(n, sock);
> >>> -	} else
> >>> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
> >>>  
> >>> -	return ret;
> >>> +	return vhost_poll_start(poll, sock->file);
> >>>  }
> >>>  
> >>>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
> >>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> >>> index 9759249..4eecdb8 100644
> >>> --- a/drivers/vhost/vhost.c
> >>> +++ b/drivers/vhost/vhost.c
> >>> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
> >>>  	unsigned long mask;
> >>>  	int ret = 0;
> >>>  
> >>> +	if (poll->wqh)
> >>> +		return 0;
> >>> +
> >>>  	mask = file->f_op->poll(file, &poll->table);
> >>>  	if (mask)
> >>>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
> >>> -- 
> >>> 1.7.1
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe kvm" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-11  7:09   ` Jason Wang
@ 2013-03-11  7:33     ` Jason Wang
  2013-03-11  8:26       ` Michael S. Tsirkin
  2013-03-11  8:29     ` Michael S. Tsirkin
  1 sibling, 1 reply; 12+ messages in thread
From: Jason Wang @ 2013-03-11  7:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel

On 03/11/2013 03:09 PM, Jason Wang wrote:
> On 03/11/2013 12:50 AM, Michael S. Tsirkin wrote:
>> On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
>>> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
>>> errors when setting backend), we in fact track the polling state through
>>> poll->wqh, so there's no need to duplicate the work with an extra
>>> vhost_net_polling_state. So this patch removes this and make the code simpler.
>>>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> I'd prefer a more radical approach, since I think it can be even
>> simpler: tap and macvtap backends both only send events when tx queue
>> overruns which should almost never happen.
>>
>> So let's just start polling when VQ is enabled
>> drop all poll_start/stop calls on data path.
> I think then it may damage the performance at least for tx. We
> conditionally enable the tx polling in the past to reduce the
> unnecessary wakeups of vhost thead when zerocopy or sndbuf is used. If
> we don't stop the polling, after each packet were transmitted,
> tap/macvtap will try to wakeup the vhost thread.

Or for zerocopy, looks like we can in fact just depends on
ubuf->callback in stead of tx polling to wakeup tx thread to do tx
completion, so we can even drop the num_pends and VHOST_MAX_PEND stuffs.
> Actually, I'm considering the reverse direction. Looks like we can
> disable the rx polling like what we do for tx in handle_tx() to reduce
> the uncessary wakeups.
>> The optimization was written for packet socket backend but I know of no
>> one caring about performance of that one that much.
>> Needs a bit of perf testing to make sure I didn't miss anything though
>> but it's not 3.9 material anyway so there's no rush.
>>
>>> ---
>>>  drivers/vhost/net.c   |   60 ++++++++----------------------------------------
>>>  drivers/vhost/vhost.c |    3 ++
>>>  2 files changed, 13 insertions(+), 50 deletions(-)
>>>
>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>>> index 959b1cd..d1a03dd 100644
>>> --- a/drivers/vhost/net.c
>>> +++ b/drivers/vhost/net.c
>>> @@ -64,20 +64,10 @@ enum {
>>>  	VHOST_NET_VQ_MAX = 2,
>>>  };
>>>  
>>> -enum vhost_net_poll_state {
>>> -	VHOST_NET_POLL_DISABLED = 0,
>>> -	VHOST_NET_POLL_STARTED = 1,
>>> -	VHOST_NET_POLL_STOPPED = 2,
>>> -};
>>> -
>>>  struct vhost_net {
>>>  	struct vhost_dev dev;
>>>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>>>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
>>> -	/* Tells us whether we are polling a socket for TX.
>>> -	 * We only do this when socket buffer fills up.
>>> -	 * Protected by tx vq lock. */
>>> -	enum vhost_net_poll_state tx_poll_state;
>>>  	/* Number of TX recently submitted.
>>>  	 * Protected by tx vq lock. */
>>>  	unsigned tx_packets;
>>> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
>>>  	}
>>>  }
>>>  
>>> -/* Caller must have TX VQ lock */
>>> -static void tx_poll_stop(struct vhost_net *net)
>>> -{
>>> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>>> -		return;
>>> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>>> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>>> -}
>>> -
>>> -/* Caller must have TX VQ lock */
>>> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
>>> -{
>>> -	int ret;
>>> -
>>> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>>> -		return 0;
>>> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>>> -	if (!ret)
>>> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
>>> -	return ret;
>>> -}
>>> -
>>>  /* In case of DMA done not in order in lower device driver for some reason.
>>>   * upend_idx is used to track end of used idx, done_idx is used to track head
>>>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
>>> @@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>>>  static void handle_tx(struct vhost_net *net)
>>>  {
>>>  	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
>>> +	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>>>  	unsigned out, in, s;
>>>  	int head;
>>>  	struct msghdr msg = {
>>> @@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
>>>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>>>  	if (wmem >= sock->sk->sk_sndbuf) {
>>>  		mutex_lock(&vq->mutex);
>>> -		tx_poll_start(net, sock);
>>> +		vhost_poll_start(poll, sock->file);
>>>  		mutex_unlock(&vq->mutex);
>>>  		return;
>>>  	}
>>> @@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
>>>  	vhost_disable_notify(&net->dev, vq);
>>>  
>>>  	if (wmem < sock->sk->sk_sndbuf / 2)
>>> -		tx_poll_stop(net);
>>> +		vhost_poll_stop(poll);
>>>  	hdr_size = vq->vhost_hlen;
>>>  	zcopy = vq->ubufs;
>>>  
>>> @@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
>>>  
>>>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>>>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
>>> -				tx_poll_start(net, sock);
>>> +				vhost_poll_start(poll, sock->file);
>>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>>>  				break;
>>>  			}
>>> @@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
>>>  				    (vq->upend_idx - vq->done_idx) :
>>>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
>>>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
>>> -				tx_poll_start(net, sock);
>>> +				vhost_poll_start(poll, sock->file);
>>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>>>  				break;
>>>  			}
>>> @@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
>>>  			}
>>>  			vhost_discard_vq_desc(vq, 1);
>>>  			if (err == -EAGAIN || err == -ENOBUFS)
>>> -				tx_poll_start(net, sock);
>>> +				vhost_poll_start(poll, sock->file);
>>>  			break;
>>>  		}
>>>  		if (err != len)
>>> @@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>>>  
>>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
>>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
>>> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>>>  
>>>  	f->private_data = n;
>>>  
>>> @@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>>>  static void vhost_net_disable_vq(struct vhost_net *n,
>>>  				 struct vhost_virtqueue *vq)
>>>  {
>>> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>>>  	if (!vq->private_data)
>>>  		return;
>>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
>>> -		tx_poll_stop(n);
>>> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>>> -	} else
>>> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
>>> +	vhost_poll_stop(poll);
>>>  }
>>>  
>>>  static int vhost_net_enable_vq(struct vhost_net *n,
>>>  				struct vhost_virtqueue *vq)
>>>  {
>>> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>>>  	struct socket *sock;
>>> -	int ret;
>>>  
>>>  	sock = rcu_dereference_protected(vq->private_data,
>>>  					 lockdep_is_held(&vq->mutex));
>>>  	if (!sock)
>>>  		return 0;
>>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
>>> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
>>> -		ret = tx_poll_start(n, sock);
>>> -	} else
>>> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
>>>  
>>> -	return ret;
>>> +	return vhost_poll_start(poll, sock->file);
>>>  }
>>>  
>>>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>> index 9759249..4eecdb8 100644
>>> --- a/drivers/vhost/vhost.c
>>> +++ b/drivers/vhost/vhost.c
>>> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
>>>  	unsigned long mask;
>>>  	int ret = 0;
>>>  
>>> +	if (poll->wqh)
>>> +		return 0;
>>> +
>>>  	mask = file->f_op->poll(file, &poll->table);
>>>  	if (mask)
>>>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
>>> -- 
>>> 1.7.1
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-10 16:50 ` Michael S. Tsirkin
@ 2013-03-11  7:09   ` Jason Wang
  2013-03-11  7:33     ` Jason Wang
  2013-03-11  8:29     ` Michael S. Tsirkin
  0 siblings, 2 replies; 12+ messages in thread
From: Jason Wang @ 2013-03-11  7:09 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel

On 03/11/2013 12:50 AM, Michael S. Tsirkin wrote:
> On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
>> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
>> errors when setting backend), we in fact track the polling state through
>> poll->wqh, so there's no need to duplicate the work with an extra
>> vhost_net_polling_state. So this patch removes this and make the code simpler.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> I'd prefer a more radical approach, since I think it can be even
> simpler: tap and macvtap backends both only send events when tx queue
> overruns which should almost never happen.
>
> So let's just start polling when VQ is enabled
> drop all poll_start/stop calls on data path.

I think then it may damage the performance at least for tx. We
conditionally enable the tx polling in the past to reduce the
unnecessary wakeups of vhost thead when zerocopy or sndbuf is used. If
we don't stop the polling, after each packet were transmitted,
tap/macvtap will try to wakeup the vhost thread.

Actually, I'm considering the reverse direction. Looks like we can
disable the rx polling like what we do for tx in handle_tx() to reduce
the uncessary wakeups.
> The optimization was written for packet socket backend but I know of no
> one caring about performance of that one that much.
> Needs a bit of perf testing to make sure I didn't miss anything though
> but it's not 3.9 material anyway so there's no rush.
>
>> ---
>>  drivers/vhost/net.c   |   60 ++++++++----------------------------------------
>>  drivers/vhost/vhost.c |    3 ++
>>  2 files changed, 13 insertions(+), 50 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index 959b1cd..d1a03dd 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -64,20 +64,10 @@ enum {
>>  	VHOST_NET_VQ_MAX = 2,
>>  };
>>  
>> -enum vhost_net_poll_state {
>> -	VHOST_NET_POLL_DISABLED = 0,
>> -	VHOST_NET_POLL_STARTED = 1,
>> -	VHOST_NET_POLL_STOPPED = 2,
>> -};
>> -
>>  struct vhost_net {
>>  	struct vhost_dev dev;
>>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
>> -	/* Tells us whether we are polling a socket for TX.
>> -	 * We only do this when socket buffer fills up.
>> -	 * Protected by tx vq lock. */
>> -	enum vhost_net_poll_state tx_poll_state;
>>  	/* Number of TX recently submitted.
>>  	 * Protected by tx vq lock. */
>>  	unsigned tx_packets;
>> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
>>  	}
>>  }
>>  
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_stop(struct vhost_net *net)
>> -{
>> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>> -		return;
>> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>> -}
>> -
>> -/* Caller must have TX VQ lock */
>> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
>> -{
>> -	int ret;
>> -
>> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>> -		return 0;
>> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>> -	if (!ret)
>> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
>> -	return ret;
>> -}
>> -
>>  /* In case of DMA done not in order in lower device driver for some reason.
>>   * upend_idx is used to track end of used idx, done_idx is used to track head
>>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
>> @@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>>  static void handle_tx(struct vhost_net *net)
>>  {
>>  	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
>> +	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>>  	unsigned out, in, s;
>>  	int head;
>>  	struct msghdr msg = {
>> @@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
>>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>>  	if (wmem >= sock->sk->sk_sndbuf) {
>>  		mutex_lock(&vq->mutex);
>> -		tx_poll_start(net, sock);
>> +		vhost_poll_start(poll, sock->file);
>>  		mutex_unlock(&vq->mutex);
>>  		return;
>>  	}
>> @@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
>>  	vhost_disable_notify(&net->dev, vq);
>>  
>>  	if (wmem < sock->sk->sk_sndbuf / 2)
>> -		tx_poll_stop(net);
>> +		vhost_poll_stop(poll);
>>  	hdr_size = vq->vhost_hlen;
>>  	zcopy = vq->ubufs;
>>  
>> @@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
>>  
>>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
>> -				tx_poll_start(net, sock);
>> +				vhost_poll_start(poll, sock->file);
>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>>  				break;
>>  			}
>> @@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
>>  				    (vq->upend_idx - vq->done_idx) :
>>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
>>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
>> -				tx_poll_start(net, sock);
>> +				vhost_poll_start(poll, sock->file);
>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>>  				break;
>>  			}
>> @@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
>>  			}
>>  			vhost_discard_vq_desc(vq, 1);
>>  			if (err == -EAGAIN || err == -ENOBUFS)
>> -				tx_poll_start(net, sock);
>> +				vhost_poll_start(poll, sock->file);
>>  			break;
>>  		}
>>  		if (err != len)
>> @@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>>  
>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
>> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>>  
>>  	f->private_data = n;
>>  
>> @@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>>  static void vhost_net_disable_vq(struct vhost_net *n,
>>  				 struct vhost_virtqueue *vq)
>>  {
>> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>>  	if (!vq->private_data)
>>  		return;
>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
>> -		tx_poll_stop(n);
>> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>> -	} else
>> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
>> +	vhost_poll_stop(poll);
>>  }
>>  
>>  static int vhost_net_enable_vq(struct vhost_net *n,
>>  				struct vhost_virtqueue *vq)
>>  {
>> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>>  	struct socket *sock;
>> -	int ret;
>>  
>>  	sock = rcu_dereference_protected(vq->private_data,
>>  					 lockdep_is_held(&vq->mutex));
>>  	if (!sock)
>>  		return 0;
>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
>> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
>> -		ret = tx_poll_start(n, sock);
>> -	} else
>> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
>>  
>> -	return ret;
>> +	return vhost_poll_start(poll, sock->file);
>>  }
>>  
>>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 9759249..4eecdb8 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
>>  	unsigned long mask;
>>  	int ret = 0;
>>  
>> +	if (poll->wqh)
>> +		return 0;
>> +
>>  	mask = file->f_op->poll(file, &poll->table);
>>  	if (mask)
>>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
>> -- 
>> 1.7.1
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-07  4:31 Jason Wang
  2013-03-07 21:25 ` David Miller
@ 2013-03-10 16:50 ` Michael S. Tsirkin
  2013-03-11  7:09   ` Jason Wang
  2013-03-11  8:47 ` Michael S. Tsirkin
  2 siblings, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-03-10 16:50 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel

On Thu, Mar 07, 2013 at 12:31:56PM +0800, Jason Wang wrote:
> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
> errors when setting backend), we in fact track the polling state through
> poll->wqh, so there's no need to duplicate the work with an extra
> vhost_net_polling_state. So this patch removes this and make the code simpler.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

I'd prefer a more radical approach, since I think it can be even
simpler: tap and macvtap backends both only send events when tx queue
overruns which should almost never happen.

So let's just start polling when VQ is enabled
drop all poll_start/stop calls on data path.

The optimization was written for packet socket backend but I know of no
one caring about performance of that one that much.
Needs a bit of perf testing to make sure I didn't miss anything though
but it's not 3.9 material anyway so there's no rush.

> ---
>  drivers/vhost/net.c   |   60 ++++++++----------------------------------------
>  drivers/vhost/vhost.c |    3 ++
>  2 files changed, 13 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 959b1cd..d1a03dd 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -64,20 +64,10 @@ enum {
>  	VHOST_NET_VQ_MAX = 2,
>  };
>  
> -enum vhost_net_poll_state {
> -	VHOST_NET_POLL_DISABLED = 0,
> -	VHOST_NET_POLL_STARTED = 1,
> -	VHOST_NET_POLL_STOPPED = 2,
> -};
> -
>  struct vhost_net {
>  	struct vhost_dev dev;
>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
> -	/* Tells us whether we are polling a socket for TX.
> -	 * We only do this when socket buffer fills up.
> -	 * Protected by tx vq lock. */
> -	enum vhost_net_poll_state tx_poll_state;
>  	/* Number of TX recently submitted.
>  	 * Protected by tx vq lock. */
>  	unsigned tx_packets;
> @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
>  	}
>  }
>  
> -/* Caller must have TX VQ lock */
> -static void tx_poll_stop(struct vhost_net *net)
> -{
> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> -		return;
> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -}
> -
> -/* Caller must have TX VQ lock */
> -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
> -{
> -	int ret;
> -
> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> -		return 0;
> -	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> -	if (!ret)
> -		net->tx_poll_state = VHOST_NET_POLL_STARTED;
> -	return ret;
> -}
> -
>  /* In case of DMA done not in order in lower device driver for some reason.
>   * upend_idx is used to track end of used idx, done_idx is used to track head
>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
> @@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>  static void handle_tx(struct vhost_net *net)
>  {
>  	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
> +	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>  	unsigned out, in, s;
>  	int head;
>  	struct msghdr msg = {
> @@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>  	if (wmem >= sock->sk->sk_sndbuf) {
>  		mutex_lock(&vq->mutex);
> -		tx_poll_start(net, sock);
> +		vhost_poll_start(poll, sock->file);
>  		mutex_unlock(&vq->mutex);
>  		return;
>  	}
> @@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
>  	vhost_disable_notify(&net->dev, vq);
>  
>  	if (wmem < sock->sk->sk_sndbuf / 2)
> -		tx_poll_stop(net);
> +		vhost_poll_stop(poll);
>  	hdr_size = vq->vhost_hlen;
>  	zcopy = vq->ubufs;
>  
> @@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
>  
>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
> -				tx_poll_start(net, sock);
> +				vhost_poll_start(poll, sock->file);
>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>  				break;
>  			}
> @@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
>  				    (vq->upend_idx - vq->done_idx) :
>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
> -				tx_poll_start(net, sock);
> +				vhost_poll_start(poll, sock->file);
>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>  				break;
>  			}
> @@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
>  			}
>  			vhost_discard_vq_desc(vq, 1);
>  			if (err == -EAGAIN || err == -ENOBUFS)
> -				tx_poll_start(net, sock);
> +				vhost_poll_start(poll, sock->file);
>  			break;
>  		}
>  		if (err != len)
> @@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  
>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>  
>  	f->private_data = n;
>  
> @@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  static void vhost_net_disable_vq(struct vhost_net *n,
>  				 struct vhost_virtqueue *vq)
>  {
> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>  	if (!vq->private_data)
>  		return;
> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> -		tx_poll_stop(n);
> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
> -	} else
> -		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
> +	vhost_poll_stop(poll);
>  }
>  
>  static int vhost_net_enable_vq(struct vhost_net *n,
>  				struct vhost_virtqueue *vq)
>  {
> +	struct vhost_poll *poll = n->poll + (vq - n->vqs);
>  	struct socket *sock;
> -	int ret;
>  
>  	sock = rcu_dereference_protected(vq->private_data,
>  					 lockdep_is_held(&vq->mutex));
>  	if (!sock)
>  		return 0;
> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -		ret = tx_poll_start(n, sock);
> -	} else
> -		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
>  
> -	return ret;
> +	return vhost_poll_start(poll, sock->file);
>  }
>  
>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 9759249..4eecdb8 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
>  	unsigned long mask;
>  	int ret = 0;
>  
> +	if (poll->wqh)
> +		return 0;
> +
>  	mask = file->f_op->poll(file, &poll->table);
>  	if (mask)
>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
> -- 
> 1.7.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] vhost_net: remove tx polling state
  2013-03-07  4:31 Jason Wang
@ 2013-03-07 21:25 ` David Miller
  2013-03-10 16:50 ` Michael S. Tsirkin
  2013-03-11  8:47 ` Michael S. Tsirkin
  2 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2013-03-07 21:25 UTC (permalink / raw)
  To: jasowang; +Cc: mst, kvm, virtualization, netdev, linux-kernel

From: Jason Wang <jasowang@redhat.com>
Date: Thu,  7 Mar 2013 12:31:56 +0800

> After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
> errors when setting backend), we in fact track the polling state through
> poll->wqh, so there's no need to duplicate the work with an extra
> vhost_net_polling_state. So this patch removes this and make the code simpler.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Can I get an ACK or two from some VHOST folks?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] vhost_net: remove tx polling state
@ 2013-03-07  4:31 Jason Wang
  2013-03-07 21:25 ` David Miller
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Jason Wang @ 2013-03-07  4:31 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel; +Cc: Jason Wang

After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
errors when setting backend), we in fact track the polling state through
poll->wqh, so there's no need to duplicate the work with an extra
vhost_net_polling_state. So this patch removes this and make the code simpler.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c   |   60 ++++++++----------------------------------------
 drivers/vhost/vhost.c |    3 ++
 2 files changed, 13 insertions(+), 50 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 959b1cd..d1a03dd 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,10 @@ enum {
 	VHOST_NET_VQ_MAX = 2,
 };
 
-enum vhost_net_poll_state {
-	VHOST_NET_POLL_DISABLED = 0,
-	VHOST_NET_POLL_STARTED = 1,
-	VHOST_NET_POLL_STOPPED = 2,
-};
-
 struct vhost_net {
 	struct vhost_dev dev;
 	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
 	struct vhost_poll poll[VHOST_NET_VQ_MAX];
-	/* Tells us whether we are polling a socket for TX.
-	 * We only do this when socket buffer fills up.
-	 * Protected by tx vq lock. */
-	enum vhost_net_poll_state tx_poll_state;
 	/* Number of TX recently submitted.
 	 * Protected by tx vq lock. */
 	unsigned tx_packets;
@@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
 	}
 }
 
-/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
-{
-	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
-		return;
-	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
-	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
-}
-
-/* Caller must have TX VQ lock */
-static int tx_poll_start(struct vhost_net *net, struct socket *sock)
-{
-	int ret;
-
-	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
-		return 0;
-	ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
-	if (!ret)
-		net->tx_poll_state = VHOST_NET_POLL_STARTED;
-	return ret;
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -231,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+	struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
 	unsigned out, in, s;
 	int head;
 	struct msghdr msg = {
@@ -256,7 +225,7 @@ static void handle_tx(struct vhost_net *net)
 	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
 	if (wmem >= sock->sk->sk_sndbuf) {
 		mutex_lock(&vq->mutex);
-		tx_poll_start(net, sock);
+		vhost_poll_start(poll, sock->file);
 		mutex_unlock(&vq->mutex);
 		return;
 	}
@@ -265,7 +234,7 @@ static void handle_tx(struct vhost_net *net)
 	vhost_disable_notify(&net->dev, vq);
 
 	if (wmem < sock->sk->sk_sndbuf / 2)
-		tx_poll_stop(net);
+		vhost_poll_stop(poll);
 	hdr_size = vq->vhost_hlen;
 	zcopy = vq->ubufs;
 
@@ -287,7 +256,7 @@ static void handle_tx(struct vhost_net *net)
 
 			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
 			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
-				tx_poll_start(net, sock);
+				vhost_poll_start(poll, sock->file);
 				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
 				break;
 			}
@@ -298,7 +267,7 @@ static void handle_tx(struct vhost_net *net)
 				    (vq->upend_idx - vq->done_idx) :
 				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
 			if (unlikely(num_pends > VHOST_MAX_PEND)) {
-				tx_poll_start(net, sock);
+				vhost_poll_start(poll, sock->file);
 				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
 				break;
 			}
@@ -364,7 +333,7 @@ static void handle_tx(struct vhost_net *net)
 			}
 			vhost_discard_vq_desc(vq, 1);
 			if (err == -EAGAIN || err == -ENOBUFS)
-				tx_poll_start(net, sock);
+				vhost_poll_start(poll, sock->file);
 			break;
 		}
 		if (err != len)
@@ -627,7 +596,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 
 	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
 	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
-	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
 
 	f->private_data = n;
 
@@ -637,32 +605,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 static void vhost_net_disable_vq(struct vhost_net *n,
 				 struct vhost_virtqueue *vq)
 {
+	struct vhost_poll *poll = n->poll + (vq - n->vqs);
 	if (!vq->private_data)
 		return;
-	if (vq == n->vqs + VHOST_NET_VQ_TX) {
-		tx_poll_stop(n);
-		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
-	} else
-		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+	vhost_poll_stop(poll);
 }
 
 static int vhost_net_enable_vq(struct vhost_net *n,
 				struct vhost_virtqueue *vq)
 {
+	struct vhost_poll *poll = n->poll + (vq - n->vqs);
 	struct socket *sock;
-	int ret;
 
 	sock = rcu_dereference_protected(vq->private_data,
 					 lockdep_is_held(&vq->mutex));
 	if (!sock)
 		return 0;
-	if (vq == n->vqs + VHOST_NET_VQ_TX) {
-		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
-		ret = tx_poll_start(n, sock);
-	} else
-		ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
 
-	return ret;
+	return vhost_poll_start(poll, sock->file);
 }
 
 static struct socket *vhost_net_stop_vq(struct vhost_net *n,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9759249..4eecdb8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
 	unsigned long mask;
 	int ret = 0;
 
+	if (poll->wqh)
+		return 0;
+
 	mask = file->f_op->poll(file, &poll->table);
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-04-11 20:17 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-11  6:50 [PATCH] vhost_net: remove tx polling state Jason Wang
2013-04-11  7:24 ` Michael S. Tsirkin
2013-04-11 20:16   ` David Miller
  -- strict thread matches above, loose matches on Subject: below --
2013-03-07  4:31 Jason Wang
2013-03-07 21:25 ` David Miller
2013-03-10 16:50 ` Michael S. Tsirkin
2013-03-11  7:09   ` Jason Wang
2013-03-11  7:33     ` Jason Wang
2013-03-11  8:26       ` Michael S. Tsirkin
2013-03-11  8:29     ` Michael S. Tsirkin
2013-03-11  8:45       ` Jason Wang
2013-03-11  8:47 ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).