All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop
@ 2018-06-30  6:33 xiangxia.m.yue
  2018-06-30  6:33 ` [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one xiangxia.m.yue
                   ` (7 more replies)
  0 siblings, 8 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang; +Cc: mst, makita.toshiaki, virtualization, netdev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patches improve the guest receive and transmit performance.
On the handle_tx side, we poll the sock receive queue at the same time.
handle_rx do that in the same way.

This patches are splited from previous big patch:
http://patchwork.ozlabs.org/patch/934673/

For more performance report, see patch 4.

Tonghao Zhang (4):
  net: vhost: lock the vqs one by one
  net: vhost: replace magic number of lock annotation
  net: vhost: factor out busy polling logic to vhost_net_busy_poll()
  net: vhost: add rx busy polling in tx path

 drivers/vhost/net.c   | 106 +++++++++++++++++++++++++++-----------------------
 drivers/vhost/vhost.c |  24 ++++--------
 2 files changed, 65 insertions(+), 65 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
  2018-06-30  6:33 ` [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-07-02  2:21   ` Jason Wang
  2018-07-02  2:21   ` Jason Wang
  2018-06-30  6:33 ` [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation xiangxia.m.yue
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang
  Cc: mst, makita.toshiaki, virtualization, netdev, Tonghao Zhang,
	Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patch changes the way that lock all vqs
at the same, to lock them one by one. It will
be used for next patch to avoid the deadlock.

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/vhost.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 895eaa2..4ca9383 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
 {
 	int i;
 
-	for (i = 0; i < d->nvqs; ++i)
+	for (i = 0; i < d->nvqs; ++i) {
+		mutex_lock(&d->vqs[i]->mutex);
 		__vhost_vq_meta_reset(d->vqs[i]);
+		mutex_unlock(&d->vqs[i]->mutex);
+	}
 }
 
 static void vhost_vq_reset(struct vhost_dev *dev,
@@ -887,20 +890,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 #define vhost_get_used(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
 
-static void vhost_dev_lock_vqs(struct vhost_dev *d)
-{
-	int i = 0;
-	for (i = 0; i < d->nvqs; ++i)
-		mutex_lock_nested(&d->vqs[i]->mutex, i);
-}
-
-static void vhost_dev_unlock_vqs(struct vhost_dev *d)
-{
-	int i = 0;
-	for (i = 0; i < d->nvqs; ++i)
-		mutex_unlock(&d->vqs[i]->mutex);
-}
-
 static int vhost_new_umem_range(struct vhost_umem *umem,
 				u64 start, u64 size, u64 end,
 				u64 userspace_addr, int perm)
@@ -950,7 +939,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
 		if (msg->iova <= vq_msg->iova &&
 		    msg->iova + msg->size - 1 > vq_msg->iova &&
 		    vq_msg->type == VHOST_IOTLB_MISS) {
+			mutex_lock(&node->vq->mutex);
 			vhost_poll_queue(&node->vq->poll);
+			mutex_unlock(&node->vq->mutex);
+
 			list_del(&node->node);
 			kfree(node);
 		}
@@ -982,7 +974,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 	int ret = 0;
 
 	mutex_lock(&dev->mutex);
-	vhost_dev_lock_vqs(dev);
 	switch (msg->type) {
 	case VHOST_IOTLB_UPDATE:
 		if (!dev->iotlb) {
@@ -1016,7 +1007,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		break;
 	}
 
-	vhost_dev_unlock_vqs(dev);
 	mutex_unlock(&dev->mutex);
 
 	return ret;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-06-30  6:33 ` xiangxia.m.yue
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang; +Cc: mst, netdev, virtualization, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patch changes the way that lock all vqs
at the same, to lock them one by one. It will
be used for next patch to avoid the deadlock.

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/vhost.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 895eaa2..4ca9383 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
 {
 	int i;
 
-	for (i = 0; i < d->nvqs; ++i)
+	for (i = 0; i < d->nvqs; ++i) {
+		mutex_lock(&d->vqs[i]->mutex);
 		__vhost_vq_meta_reset(d->vqs[i]);
+		mutex_unlock(&d->vqs[i]->mutex);
+	}
 }
 
 static void vhost_vq_reset(struct vhost_dev *dev,
@@ -887,20 +890,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 #define vhost_get_used(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
 
-static void vhost_dev_lock_vqs(struct vhost_dev *d)
-{
-	int i = 0;
-	for (i = 0; i < d->nvqs; ++i)
-		mutex_lock_nested(&d->vqs[i]->mutex, i);
-}
-
-static void vhost_dev_unlock_vqs(struct vhost_dev *d)
-{
-	int i = 0;
-	for (i = 0; i < d->nvqs; ++i)
-		mutex_unlock(&d->vqs[i]->mutex);
-}
-
 static int vhost_new_umem_range(struct vhost_umem *umem,
 				u64 start, u64 size, u64 end,
 				u64 userspace_addr, int perm)
@@ -950,7 +939,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
 		if (msg->iova <= vq_msg->iova &&
 		    msg->iova + msg->size - 1 > vq_msg->iova &&
 		    vq_msg->type == VHOST_IOTLB_MISS) {
+			mutex_lock(&node->vq->mutex);
 			vhost_poll_queue(&node->vq->poll);
+			mutex_unlock(&node->vq->mutex);
+
 			list_del(&node->node);
 			kfree(node);
 		}
@@ -982,7 +974,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 	int ret = 0;
 
 	mutex_lock(&dev->mutex);
-	vhost_dev_lock_vqs(dev);
 	switch (msg->type) {
 	case VHOST_IOTLB_UPDATE:
 		if (!dev->iotlb) {
@@ -1016,7 +1007,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		break;
 	}
 
-	vhost_dev_unlock_vqs(dev);
 	mutex_unlock(&dev->mutex);
 
 	return ret;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
  2018-06-30  6:33 ` [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one xiangxia.m.yue
  2018-06-30  6:33 ` xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-07-02  2:21   ` Jason Wang
  2018-06-30  6:33 ` xiangxia.m.yue
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang
  Cc: mst, makita.toshiaki, virtualization, netdev, Tonghao Zhang,
	Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Use the VHOST_NET_VQ_XXX as a subclass for mutex_lock_nested.

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e7cf7d2..62bb8e8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -484,7 +484,7 @@ static void handle_tx(struct vhost_net *net)
 	bool zcopy, zcopy_used;
 	int sent_pkts = 0;
 
-	mutex_lock(&vq->mutex);
+	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
@@ -655,7 +655,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 		/* Flush batched heads first */
 		vhost_rx_signal_used(rvq);
 		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, 1);
+		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
 		vhost_disable_notify(&net->dev, vq);
 
 		preempt_disable();
@@ -789,7 +789,7 @@ static void handle_rx(struct vhost_net *net)
 	__virtio16 num_buffers;
 	int recv_pkts = 0;
 
-	mutex_lock_nested(&vq->mutex, 0);
+	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
                   ` (2 preceding siblings ...)
  2018-06-30  6:33 ` [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-06-30  6:33 ` [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll() xiangxia.m.yue
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang; +Cc: mst, netdev, virtualization, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Use the VHOST_NET_VQ_XXX as a subclass for mutex_lock_nested.

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e7cf7d2..62bb8e8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -484,7 +484,7 @@ static void handle_tx(struct vhost_net *net)
 	bool zcopy, zcopy_used;
 	int sent_pkts = 0;
 
-	mutex_lock(&vq->mutex);
+	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
@@ -655,7 +655,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 		/* Flush batched heads first */
 		vhost_rx_signal_used(rvq);
 		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, 1);
+		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
 		vhost_disable_notify(&net->dev, vq);
 
 		preempt_disable();
@@ -789,7 +789,7 @@ static void handle_rx(struct vhost_net *net)
 	__virtio16 num_buffers;
 	int recv_pkts = 0;
 
-	mutex_lock_nested(&vq->mutex, 0);
+	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
                   ` (3 preceding siblings ...)
  2018-06-30  6:33 ` xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-07-02  2:29   ` Jason Wang
  2018-06-30  6:33 ` xiangxia.m.yue
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang
  Cc: mst, makita.toshiaki, virtualization, netdev, Tonghao Zhang,
	Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Factor out generic busy polling logic and will be
used for tx path in the next patch. And with the patch,
qemu can set differently the busyloop_timeout for rx queue.

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/net.c | 92 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 62bb8e8..458f81d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -429,6 +429,50 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	return vhost_poll_start(poll, sock->file);
 }
 
+static int sk_has_rx_data(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
+	return skb_queue_empty(&sk->sk_receive_queue);
+}
+
+static void vhost_net_busy_poll(struct vhost_net *net,
+				struct vhost_virtqueue *rvq,
+				struct vhost_virtqueue *tvq,
+				bool rx)
+{
+	unsigned long uninitialized_var(endtime);
+	struct socket *sock = rvq->private_data;
+	struct vhost_virtqueue *vq = rx ? tvq : rvq;
+	unsigned long busyloop_timeout = rx ? rvq->busyloop_timeout :
+					      tvq->busyloop_timeout;
+
+	mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);
+	vhost_disable_notify(&net->dev, vq);
+
+	preempt_disable();
+	endtime = busy_clock() + busyloop_timeout;
+	while (vhost_can_busy_poll(tvq->dev, endtime) &&
+	       !(sock && sk_has_rx_data(sock->sk)) &&
+	       vhost_vq_avail_empty(tvq->dev, tvq))
+		cpu_relax();
+	preempt_enable();
+
+	if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
+	    (!rx && (sock && sk_has_rx_data(sock->sk)))) {
+		vhost_poll_queue(&vq->poll);
+	} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+		vhost_disable_notify(&net->dev, vq);
+		vhost_poll_queue(&vq->poll);
+	}
+
+	mutex_unlock(&vq->mutex);
+}
+
+
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
 				    struct iovec iov[], unsigned int iov_size,
@@ -621,16 +665,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
 	return len;
 }
 
-static int sk_has_rx_data(struct sock *sk)
-{
-	struct socket *sock = sk->sk_socket;
-
-	if (sock->ops->peek_len)
-		return sock->ops->peek_len(sock);
-
-	return skb_queue_empty(&sk->sk_receive_queue);
-}
-
 static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
 {
 	struct vhost_virtqueue *vq = &nvq->vq;
@@ -645,39 +679,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
 
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 {
-	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
-	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
-	struct vhost_virtqueue *vq = &nvq->vq;
-	unsigned long uninitialized_var(endtime);
-	int len = peek_head_len(rvq, sk);
+	struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
+	struct vhost_net_virtqueue *nvq_tx = &net->vqs[VHOST_NET_VQ_TX];
 
-	if (!len && vq->busyloop_timeout) {
-		/* Flush batched heads first */
-		vhost_rx_signal_used(rvq);
-		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
-		vhost_disable_notify(&net->dev, vq);
+	int len = peek_head_len(nvq_rx, sk);
 
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-
-		while (vhost_can_busy_poll(&net->dev, endtime) &&
-		       !sk_has_rx_data(sk) &&
-		       vhost_vq_avail_empty(&net->dev, vq))
-			cpu_relax();
-
-		preempt_enable();
-
-		if (!vhost_vq_avail_empty(&net->dev, vq))
-			vhost_poll_queue(&vq->poll);
-		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
-			vhost_disable_notify(&net->dev, vq);
-			vhost_poll_queue(&vq->poll);
-		}
+	if (!len && nvq_rx->vq.busyloop_timeout) {
+		/* Flush batched heads first */
+		vhost_rx_signal_used(nvq_rx);
 
-		mutex_unlock(&vq->mutex);
+		/* Both tx vq and rx socket were polled here */
+		vhost_net_busy_poll(net, &nvq_rx->vq, &nvq_tx->vq, true);
 
-		len = peek_head_len(rvq, sk);
+		len = peek_head_len(nvq_rx, sk);
 	}
 
 	return len;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
                   ` (4 preceding siblings ...)
  2018-06-30  6:33 ` [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll() xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-06-30  6:33 ` [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path xiangxia.m.yue
  2018-06-30  6:33 ` xiangxia.m.yue
  7 siblings, 0 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang; +Cc: mst, netdev, virtualization, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Factor out generic busy polling logic and will be
used for tx path in the next patch. And with the patch,
qemu can set differently the busyloop_timeout for rx queue.

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/net.c | 92 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 62bb8e8..458f81d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -429,6 +429,50 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	return vhost_poll_start(poll, sock->file);
 }
 
+static int sk_has_rx_data(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
+	return skb_queue_empty(&sk->sk_receive_queue);
+}
+
+static void vhost_net_busy_poll(struct vhost_net *net,
+				struct vhost_virtqueue *rvq,
+				struct vhost_virtqueue *tvq,
+				bool rx)
+{
+	unsigned long uninitialized_var(endtime);
+	struct socket *sock = rvq->private_data;
+	struct vhost_virtqueue *vq = rx ? tvq : rvq;
+	unsigned long busyloop_timeout = rx ? rvq->busyloop_timeout :
+					      tvq->busyloop_timeout;
+
+	mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);
+	vhost_disable_notify(&net->dev, vq);
+
+	preempt_disable();
+	endtime = busy_clock() + busyloop_timeout;
+	while (vhost_can_busy_poll(tvq->dev, endtime) &&
+	       !(sock && sk_has_rx_data(sock->sk)) &&
+	       vhost_vq_avail_empty(tvq->dev, tvq))
+		cpu_relax();
+	preempt_enable();
+
+	if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
+	    (!rx && (sock && sk_has_rx_data(sock->sk)))) {
+		vhost_poll_queue(&vq->poll);
+	} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+		vhost_disable_notify(&net->dev, vq);
+		vhost_poll_queue(&vq->poll);
+	}
+
+	mutex_unlock(&vq->mutex);
+}
+
+
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
 				    struct iovec iov[], unsigned int iov_size,
@@ -621,16 +665,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
 	return len;
 }
 
-static int sk_has_rx_data(struct sock *sk)
-{
-	struct socket *sock = sk->sk_socket;
-
-	if (sock->ops->peek_len)
-		return sock->ops->peek_len(sock);
-
-	return skb_queue_empty(&sk->sk_receive_queue);
-}
-
 static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
 {
 	struct vhost_virtqueue *vq = &nvq->vq;
@@ -645,39 +679,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
 
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 {
-	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
-	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
-	struct vhost_virtqueue *vq = &nvq->vq;
-	unsigned long uninitialized_var(endtime);
-	int len = peek_head_len(rvq, sk);
+	struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
+	struct vhost_net_virtqueue *nvq_tx = &net->vqs[VHOST_NET_VQ_TX];
 
-	if (!len && vq->busyloop_timeout) {
-		/* Flush batched heads first */
-		vhost_rx_signal_used(rvq);
-		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
-		vhost_disable_notify(&net->dev, vq);
+	int len = peek_head_len(nvq_rx, sk);
 
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-
-		while (vhost_can_busy_poll(&net->dev, endtime) &&
-		       !sk_has_rx_data(sk) &&
-		       vhost_vq_avail_empty(&net->dev, vq))
-			cpu_relax();
-
-		preempt_enable();
-
-		if (!vhost_vq_avail_empty(&net->dev, vq))
-			vhost_poll_queue(&vq->poll);
-		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
-			vhost_disable_notify(&net->dev, vq);
-			vhost_poll_queue(&vq->poll);
-		}
+	if (!len && nvq_rx->vq.busyloop_timeout) {
+		/* Flush batched heads first */
+		vhost_rx_signal_used(nvq_rx);
 
-		mutex_unlock(&vq->mutex);
+		/* Both tx vq and rx socket were polled here */
+		vhost_net_busy_poll(net, &nvq_rx->vq, &nvq_tx->vq, true);
 
-		len = peek_head_len(rvq, sk);
+		len = peek_head_len(nvq_rx, sk);
 	}
 
 	return len;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
                   ` (6 preceding siblings ...)
  2018-06-30  6:33 ` [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-06-30  7:03   ` Jesper Dangaard Brouer
  2018-07-02  2:32   ` Jason Wang
  7 siblings, 2 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang
  Cc: mst, makita.toshiaki, virtualization, netdev, Tonghao Zhang,
	Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patch improves the guest receive and transmit performance.
On the handle_tx side, we poll the sock receive queue at the
same time. handle_rx do that in the same way.

We set the poll-us=100us and use the iperf3 to test
its bandwidth, use the netperf to test throughput and mean
latency. When running the tests, the vhost-net kthread of
that VM, is alway 100% CPU. The commands are shown as below.

iperf3  -s -D
iperf3  -c IP -i 1 -P 1 -t 20 -M 1400

or
netserver
netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY"

host -> guest:
iperf3:
* With the patch:     27.0 Gbits/sec
* Without the patch:  14.4 Gbits/sec

netperf (TCP_RR):
* With the patch:     48039.56 trans/s, 20.64us mean latency
* Without the patch:  46027.07 trans/s, 21.58us mean latency

This patch also improves the guest transmit performance.

guest -> host:
iperf3:
* With the patch:     27.2 Gbits/sec
* Without the patch:  24.4 Gbits/sec

netperf (TCP_RR):
* With the patch:     47963.25 trans/s, 20.71us mean latency
* Without the patch:  45796.70 trans/s, 21.68us mean latency

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/net.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 458f81d..fb43d82 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -478,17 +478,13 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct iovec iov[], unsigned int iov_size,
 				    unsigned int *out_num, unsigned int *in_num)
 {
-	unsigned long uninitialized_var(endtime);
+	struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
 	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				  out_num, in_num, NULL, NULL);
 
 	if (r == vq->num && vq->busyloop_timeout) {
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-		while (vhost_can_busy_poll(vq->dev, endtime) &&
-		       vhost_vq_avail_empty(vq->dev, vq))
-			cpu_relax();
-		preempt_enable();
+		vhost_net_busy_poll(net, &nvq_rx->vq, vq, false);
+
 		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				      out_num, in_num, NULL, NULL);
 	}
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path
  2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
                   ` (5 preceding siblings ...)
  2018-06-30  6:33 ` xiangxia.m.yue
@ 2018-06-30  6:33 ` xiangxia.m.yue
  2018-06-30  6:33 ` xiangxia.m.yue
  7 siblings, 0 replies; 17+ messages in thread
From: xiangxia.m.yue @ 2018-06-30  6:33 UTC (permalink / raw)
  To: jasowang; +Cc: mst, netdev, virtualization, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patch improves the guest receive and transmit performance.
On the handle_tx side, we poll the sock receive queue at the
same time. handle_rx do that in the same way.

We set the poll-us=100us and use the iperf3 to test
its bandwidth, use the netperf to test throughput and mean
latency. When running the tests, the vhost-net kthread of
that VM, is alway 100% CPU. The commands are shown as below.

iperf3  -s -D
iperf3  -c IP -i 1 -P 1 -t 20 -M 1400

or
netserver
netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY"

host -> guest:
iperf3:
* With the patch:     27.0 Gbits/sec
* Without the patch:  14.4 Gbits/sec

netperf (TCP_RR):
* With the patch:     48039.56 trans/s, 20.64us mean latency
* Without the patch:  46027.07 trans/s, 21.58us mean latency

This patch also improves the guest transmit performance.

guest -> host:
iperf3:
* With the patch:     27.2 Gbits/sec
* Without the patch:  24.4 Gbits/sec

netperf (TCP_RR):
* With the patch:     47963.25 trans/s, 20.71us mean latency
* Without the patch:  45796.70 trans/s, 21.68us mean latency

Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
---
 drivers/vhost/net.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 458f81d..fb43d82 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -478,17 +478,13 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct iovec iov[], unsigned int iov_size,
 				    unsigned int *out_num, unsigned int *in_num)
 {
-	unsigned long uninitialized_var(endtime);
+	struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
 	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				  out_num, in_num, NULL, NULL);
 
 	if (r == vq->num && vq->busyloop_timeout) {
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-		while (vhost_can_busy_poll(vq->dev, endtime) &&
-		       vhost_vq_avail_empty(vq->dev, vq))
-			cpu_relax();
-		preempt_enable();
+		vhost_net_busy_poll(net, &nvq_rx->vq, vq, false);
+
 		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				      out_num, in_num, NULL, NULL);
 	}
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path
  2018-06-30  6:33 ` xiangxia.m.yue
@ 2018-06-30  7:03   ` Jesper Dangaard Brouer
  2018-06-30  8:48     ` Tonghao Zhang
  2018-07-02  2:32   ` Jason Wang
  1 sibling, 1 reply; 17+ messages in thread
From: Jesper Dangaard Brouer @ 2018-06-30  7:03 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: mst, netdev, brouer, virtualization, Tonghao Zhang

On Fri, 29 Jun 2018 23:33:58 -0700
xiangxia.m.yue@gmail.com wrote:

> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> This patch improves the guest receive and transmit performance.
> On the handle_tx side, we poll the sock receive queue at the
> same time. handle_rx do that in the same way.
> 
> We set the poll-us=100us and use the iperf3 to test

Where/how do you configure poll-us=100us ?

Are you talking about /proc/sys/net/core/busy_poll ?


p.s. Nice performance boost! :-)

> its bandwidth, use the netperf to test throughput and mean
> latency. When running the tests, the vhost-net kthread of
> that VM, is alway 100% CPU. The commands are shown as below.
> 
> iperf3  -s -D
> iperf3  -c IP -i 1 -P 1 -t 20 -M 1400
> 
> or
> netserver
> netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY"
> 
> host -> guest:
> iperf3:
> * With the patch:     27.0 Gbits/sec
> * Without the patch:  14.4 Gbits/sec
> 
> netperf (TCP_RR):
> * With the patch:     48039.56 trans/s, 20.64us mean latency
> * Without the patch:  46027.07 trans/s, 21.58us mean latency
> 
> This patch also improves the guest transmit performance.
> 
> guest -> host:
> iperf3:
> * With the patch:     27.2 Gbits/sec
> * Without the patch:  24.4 Gbits/sec
> 
> netperf (TCP_RR):
> * With the patch:     47963.25 trans/s, 20.71us mean latency
> * Without the patch:  45796.70 trans/s, 21.68us mean latency
> 
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path
  2018-06-30  7:03   ` Jesper Dangaard Brouer
@ 2018-06-30  8:48     ` Tonghao Zhang
  0 siblings, 0 replies; 17+ messages in thread
From: Tonghao Zhang @ 2018-06-30  8:48 UTC (permalink / raw)
  To: brouer
  Cc: mst, Linux Kernel Network Developers, virtualization, Tonghao Zhang

On Sat, Jun 30, 2018 at 3:03 PM Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
>
> On Fri, 29 Jun 2018 23:33:58 -0700
> xiangxia.m.yue@gmail.com wrote:
>
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > This patch improves the guest receive and transmit performance.
> > On the handle_tx side, we poll the sock receive queue at the
> > same time. handle_rx do that in the same way.
> >
> > We set the poll-us=100us and use the iperf3 to test
>
> Where/how do you configure poll-us=100us ?
>
> Are you talking about /proc/sys/net/core/busy_poll ?
No, we set the poll-us in qemu. e.g
-netdev tap,ifname=tap0,id=hostnet0,vhost=on,script=no,downscript=no,poll-us=100

>
> p.s. Nice performance boost! :-)
>
> > its bandwidth, use the netperf to test throughput and mean
> > latency. When running the tests, the vhost-net kthread of
> > that VM, is alway 100% CPU. The commands are shown as below.
> >
> > iperf3  -s -D
> > iperf3  -c IP -i 1 -P 1 -t 20 -M 1400
> >
> > or
> > netserver
> > netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY"
> >
> > host -> guest:
> > iperf3:
> > * With the patch:     27.0 Gbits/sec
> > * Without the patch:  14.4 Gbits/sec
> >
> > netperf (TCP_RR):
> > * With the patch:     48039.56 trans/s, 20.64us mean latency
> > * Without the patch:  46027.07 trans/s, 21.58us mean latency
> >
> > This patch also improves the guest transmit performance.
> >
> > guest -> host:
> > iperf3:
> > * With the patch:     27.2 Gbits/sec
> > * Without the patch:  24.4 Gbits/sec
> >
> > netperf (TCP_RR):
> > * With the patch:     47963.25 trans/s, 20.71us mean latency
> > * Without the patch:  45796.70 trans/s, 21.68us mean latency
> >
> > Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
>
> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one
  2018-06-30  6:33 ` xiangxia.m.yue
@ 2018-07-02  2:21   ` Jason Wang
  2018-07-02  2:21   ` Jason Wang
  1 sibling, 0 replies; 17+ messages in thread
From: Jason Wang @ 2018-07-02  2:21 UTC (permalink / raw)
  To: xiangxia.m.yue
  Cc: mst, makita.toshiaki, virtualization, netdev, Tonghao Zhang



On 2018年06月30日 14:33, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This patch changes the way that lock all vqs
> at the same, to lock them one by one. It will
> be used for next patch to avoid the deadlock.
>
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
> ---
>   drivers/vhost/vhost.c | 24 +++++++-----------------
>   1 file changed, 7 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 895eaa2..4ca9383 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
>   {
>   	int i;
>   
> -	for (i = 0; i < d->nvqs; ++i)
> +	for (i = 0; i < d->nvqs; ++i) {
> +		mutex_lock(&d->vqs[i]->mutex);
>   		__vhost_vq_meta_reset(d->vqs[i]);
> +		mutex_unlock(&d->vqs[i]->mutex);
> +	}
>   }
>   
>   static void vhost_vq_reset(struct vhost_dev *dev,
> @@ -887,20 +890,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
>   #define vhost_get_used(vq, x, ptr) \
>   	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
>   
> -static void vhost_dev_lock_vqs(struct vhost_dev *d)
> -{
> -	int i = 0;
> -	for (i = 0; i < d->nvqs; ++i)
> -		mutex_lock_nested(&d->vqs[i]->mutex, i);
> -}
> -
> -static void vhost_dev_unlock_vqs(struct vhost_dev *d)
> -{
> -	int i = 0;
> -	for (i = 0; i < d->nvqs; ++i)
> -		mutex_unlock(&d->vqs[i]->mutex);
> -}
> -
>   static int vhost_new_umem_range(struct vhost_umem *umem,
>   				u64 start, u64 size, u64 end,
>   				u64 userspace_addr, int perm)
> @@ -950,7 +939,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
>   		if (msg->iova <= vq_msg->iova &&
>   		    msg->iova + msg->size - 1 > vq_msg->iova &&
>   		    vq_msg->type == VHOST_IOTLB_MISS) {
> +			mutex_lock(&node->vq->mutex);
>   			vhost_poll_queue(&node->vq->poll);
> +			mutex_unlock(&node->vq->mutex);
> +
>   			list_del(&node->node);
>   			kfree(node);
>   		}
> @@ -982,7 +974,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
>   	int ret = 0;
>   
>   	mutex_lock(&dev->mutex);
> -	vhost_dev_lock_vqs(dev);
>   	switch (msg->type) {
>   	case VHOST_IOTLB_UPDATE:
>   		if (!dev->iotlb) {
> @@ -1016,7 +1007,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
>   		break;
>   	}
>   
> -	vhost_dev_unlock_vqs(dev);
>   	mutex_unlock(&dev->mutex);
>   
>   	return ret;

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>

Thanks

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one
  2018-06-30  6:33 ` xiangxia.m.yue
  2018-07-02  2:21   ` Jason Wang
@ 2018-07-02  2:21   ` Jason Wang
  1 sibling, 0 replies; 17+ messages in thread
From: Jason Wang @ 2018-07-02  2:21 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: netdev, virtualization, Tonghao Zhang, mst



On 2018年06月30日 14:33, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This patch changes the way that lock all vqs
> at the same, to lock them one by one. It will
> be used for next patch to avoid the deadlock.
>
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
> ---
>   drivers/vhost/vhost.c | 24 +++++++-----------------
>   1 file changed, 7 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 895eaa2..4ca9383 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
>   {
>   	int i;
>   
> -	for (i = 0; i < d->nvqs; ++i)
> +	for (i = 0; i < d->nvqs; ++i) {
> +		mutex_lock(&d->vqs[i]->mutex);
>   		__vhost_vq_meta_reset(d->vqs[i]);
> +		mutex_unlock(&d->vqs[i]->mutex);
> +	}
>   }
>   
>   static void vhost_vq_reset(struct vhost_dev *dev,
> @@ -887,20 +890,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
>   #define vhost_get_used(vq, x, ptr) \
>   	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
>   
> -static void vhost_dev_lock_vqs(struct vhost_dev *d)
> -{
> -	int i = 0;
> -	for (i = 0; i < d->nvqs; ++i)
> -		mutex_lock_nested(&d->vqs[i]->mutex, i);
> -}
> -
> -static void vhost_dev_unlock_vqs(struct vhost_dev *d)
> -{
> -	int i = 0;
> -	for (i = 0; i < d->nvqs; ++i)
> -		mutex_unlock(&d->vqs[i]->mutex);
> -}
> -
>   static int vhost_new_umem_range(struct vhost_umem *umem,
>   				u64 start, u64 size, u64 end,
>   				u64 userspace_addr, int perm)
> @@ -950,7 +939,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
>   		if (msg->iova <= vq_msg->iova &&
>   		    msg->iova + msg->size - 1 > vq_msg->iova &&
>   		    vq_msg->type == VHOST_IOTLB_MISS) {
> +			mutex_lock(&node->vq->mutex);
>   			vhost_poll_queue(&node->vq->poll);
> +			mutex_unlock(&node->vq->mutex);
> +
>   			list_del(&node->node);
>   			kfree(node);
>   		}
> @@ -982,7 +974,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
>   	int ret = 0;
>   
>   	mutex_lock(&dev->mutex);
> -	vhost_dev_lock_vqs(dev);
>   	switch (msg->type) {
>   	case VHOST_IOTLB_UPDATE:
>   		if (!dev->iotlb) {
> @@ -1016,7 +1007,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
>   		break;
>   	}
>   
> -	vhost_dev_unlock_vqs(dev);
>   	mutex_unlock(&dev->mutex);
>   
>   	return ret;

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>

Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation
  2018-06-30  6:33 ` [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation xiangxia.m.yue
@ 2018-07-02  2:21   ` Jason Wang
  0 siblings, 0 replies; 17+ messages in thread
From: Jason Wang @ 2018-07-02  2:21 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: netdev, virtualization, Tonghao Zhang, mst



On 2018年06月30日 14:33, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Use the VHOST_NET_VQ_XXX as a subclass for mutex_lock_nested.
>
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
> ---
>   drivers/vhost/net.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index e7cf7d2..62bb8e8 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -484,7 +484,7 @@ static void handle_tx(struct vhost_net *net)
>   	bool zcopy, zcopy_used;
>   	int sent_pkts = 0;
>   
> -	mutex_lock(&vq->mutex);
> +	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
>   	sock = vq->private_data;
>   	if (!sock)
>   		goto out;
> @@ -655,7 +655,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
>   		/* Flush batched heads first */
>   		vhost_rx_signal_used(rvq);
>   		/* Both tx vq and rx socket were polled here */
> -		mutex_lock_nested(&vq->mutex, 1);
> +		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
>   		vhost_disable_notify(&net->dev, vq);
>   
>   		preempt_disable();
> @@ -789,7 +789,7 @@ static void handle_rx(struct vhost_net *net)
>   	__virtio16 num_buffers;
>   	int recv_pkts = 0;
>   
> -	mutex_lock_nested(&vq->mutex, 0);
> +	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
>   	sock = vq->private_data;
>   	if (!sock)
>   		goto out;

Acked-by: Jason Wang <jasowang@redhat.com>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
  2018-06-30  6:33 ` [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll() xiangxia.m.yue
@ 2018-07-02  2:29   ` Jason Wang
  2018-07-02  4:05     ` Tonghao Zhang
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Wang @ 2018-07-02  2:29 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: netdev, virtualization, Tonghao Zhang, mst



On 2018年06月30日 14:33, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Factor out generic busy polling logic and will be
> used for tx path in the next patch. And with the patch,
> qemu can set differently the busyloop_timeout for rx queue.
>
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
> ---
>   drivers/vhost/net.c | 92 ++++++++++++++++++++++++++++++-----------------------
>   1 file changed, 53 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 62bb8e8..458f81d 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -429,6 +429,50 @@ static int vhost_net_enable_vq(struct vhost_net *n,
>   	return vhost_poll_start(poll, sock->file);
>   }
>   
> +static int sk_has_rx_data(struct sock *sk)
> +{
> +	struct socket *sock = sk->sk_socket;
> +
> +	if (sock->ops->peek_len)
> +		return sock->ops->peek_len(sock);
> +
> +	return skb_queue_empty(&sk->sk_receive_queue);
> +}
> +
> +static void vhost_net_busy_poll(struct vhost_net *net,
> +				struct vhost_virtqueue *rvq,
> +				struct vhost_virtqueue *tvq,
> +				bool rx)
> +{
> +	unsigned long uninitialized_var(endtime);
> +	struct socket *sock = rvq->private_data;
> +	struct vhost_virtqueue *vq = rx ? tvq : rvq;
> +	unsigned long busyloop_timeout = rx ? rvq->busyloop_timeout :
> +					      tvq->busyloop_timeout;

As simple as vq->busyloop_timeout?

> +
> +	mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);

We need move sock = rvq->private_data under the protection of vq mutex 
if rx is false.

> +	vhost_disable_notify(&net->dev, vq);
> +
> +	preempt_disable();
> +	endtime = busy_clock() + busyloop_timeout;
> +	while (vhost_can_busy_poll(tvq->dev, endtime) &&
> +	       !(sock && sk_has_rx_data(sock->sk)) &&
> +	       vhost_vq_avail_empty(tvq->dev, tvq))
> +		cpu_relax();
> +	preempt_enable();
> +
> +	if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
> +	    (!rx && (sock && sk_has_rx_data(sock->sk)))) {
> +		vhost_poll_queue(&vq->poll);
> +	} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> +		vhost_disable_notify(&net->dev, vq);
> +		vhost_poll_queue(&vq->poll);
> +	}
> +
> +	mutex_unlock(&vq->mutex);
> +}
> +
> +
>   static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
>   				    struct vhost_virtqueue *vq,
>   				    struct iovec iov[], unsigned int iov_size,
> @@ -621,16 +665,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
>   	return len;
>   }
>   
> -static int sk_has_rx_data(struct sock *sk)
> -{
> -	struct socket *sock = sk->sk_socket;
> -
> -	if (sock->ops->peek_len)
> -		return sock->ops->peek_len(sock);
> -
> -	return skb_queue_empty(&sk->sk_receive_queue);
> -}
> -
>   static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
>   {
>   	struct vhost_virtqueue *vq = &nvq->vq;
> @@ -645,39 +679,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
>   
>   static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
>   {
> -	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
> -	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> -	struct vhost_virtqueue *vq = &nvq->vq;
> -	unsigned long uninitialized_var(endtime);
> -	int len = peek_head_len(rvq, sk);
> +	struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
> +	struct vhost_net_virtqueue *nvq_tx = &net->vqs[VHOST_NET_VQ_TX];

It looks to me rnvq and tnvq is slightly better.

Other looks good to me.

Thanks

>   
> -	if (!len && vq->busyloop_timeout) {
> -		/* Flush batched heads first */
> -		vhost_rx_signal_used(rvq);
> -		/* Both tx vq and rx socket were polled here */
> -		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
> -		vhost_disable_notify(&net->dev, vq);
> +	int len = peek_head_len(nvq_rx, sk);
>   
> -		preempt_disable();
> -		endtime = busy_clock() + vq->busyloop_timeout;
> -
> -		while (vhost_can_busy_poll(&net->dev, endtime) &&
> -		       !sk_has_rx_data(sk) &&
> -		       vhost_vq_avail_empty(&net->dev, vq))
> -			cpu_relax();
> -
> -		preempt_enable();
> -
> -		if (!vhost_vq_avail_empty(&net->dev, vq))
> -			vhost_poll_queue(&vq->poll);
> -		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> -			vhost_disable_notify(&net->dev, vq);
> -			vhost_poll_queue(&vq->poll);
> -		}
> +	if (!len && nvq_rx->vq.busyloop_timeout) {
> +		/* Flush batched heads first */
> +		vhost_rx_signal_used(nvq_rx);
>   
> -		mutex_unlock(&vq->mutex);
> +		/* Both tx vq and rx socket were polled here */
> +		vhost_net_busy_poll(net, &nvq_rx->vq, &nvq_tx->vq, true);
>   
> -		len = peek_head_len(rvq, sk);
> +		len = peek_head_len(nvq_rx, sk);
>   	}
>   
>   	return len;

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path
  2018-06-30  6:33 ` xiangxia.m.yue
  2018-06-30  7:03   ` Jesper Dangaard Brouer
@ 2018-07-02  2:32   ` Jason Wang
  1 sibling, 0 replies; 17+ messages in thread
From: Jason Wang @ 2018-07-02  2:32 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: netdev, virtualization, Tonghao Zhang, mst



On 2018年06月30日 14:33, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This patch improves the guest receive and transmit performance.
> On the handle_tx side, we poll the sock receive queue at the
> same time. handle_rx do that in the same way.
>
> We set the poll-us=100us and use the iperf3 to test
> its bandwidth, use the netperf to test throughput and mean
> latency. When running the tests, the vhost-net kthread of
> that VM, is alway 100% CPU. The commands are shown as below.
>
> iperf3  -s -D
> iperf3  -c IP -i 1 -P 1 -t 20 -M 1400
>
> or
> netserver
> netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY"
>
> host -> guest:
> iperf3:
> * With the patch:     27.0 Gbits/sec
> * Without the patch:  14.4 Gbits/sec
>
> netperf (TCP_RR):
> * With the patch:     48039.56 trans/s, 20.64us mean latency
> * Without the patch:  46027.07 trans/s, 21.58us mean latency
>
> This patch also improves the guest transmit performance.
>
> guest -> host:
> iperf3:
> * With the patch:     27.2 Gbits/sec
> * Without the patch:  24.4 Gbits/sec
>
> netperf (TCP_RR):
> * With the patch:     47963.25 trans/s, 20.71us mean latency
> * Without the patch:  45796.70 trans/s, 21.68us mean latency
>
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
> ---
>   drivers/vhost/net.c | 10 +++-------
>   1 file changed, 3 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 458f81d..fb43d82 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -478,17 +478,13 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
>   				    struct iovec iov[], unsigned int iov_size,
>   				    unsigned int *out_num, unsigned int *in_num)
>   {
> -	unsigned long uninitialized_var(endtime);
> +	struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
>   	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>   				  out_num, in_num, NULL, NULL);
>   
>   	if (r == vq->num && vq->busyloop_timeout) {
> -		preempt_disable();
> -		endtime = busy_clock() + vq->busyloop_timeout;
> -		while (vhost_can_busy_poll(vq->dev, endtime) &&
> -		       vhost_vq_avail_empty(vq->dev, vq))
> -			cpu_relax();
> -		preempt_enable();
> +		vhost_net_busy_poll(net, &nvq_rx->vq, vq, false);
> +
>   		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>   				      out_num, in_num, NULL, NULL);
>   	}

Looks good to me.

A nit is "rnvq" looks better.

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
  2018-07-02  2:29   ` Jason Wang
@ 2018-07-02  4:05     ` Tonghao Zhang
  0 siblings, 0 replies; 17+ messages in thread
From: Tonghao Zhang @ 2018-07-02  4:05 UTC (permalink / raw)
  To: jasowang
  Cc: Linux Kernel Network Developers, virtualization, Tonghao Zhang, mst

On Mon, Jul 2, 2018 at 10:29 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
>
> On 2018年06月30日 14:33, xiangxia.m.yue@gmail.com wrote:
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > Factor out generic busy polling logic and will be
> > used for tx path in the next patch. And with the patch,
> > qemu can set differently the busyloop_timeout for rx queue.
> >
> > Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
> > ---
> >   drivers/vhost/net.c | 92 ++++++++++++++++++++++++++++++-----------------------
> >   1 file changed, 53 insertions(+), 39 deletions(-)
> >
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > index 62bb8e8..458f81d 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -429,6 +429,50 @@ static int vhost_net_enable_vq(struct vhost_net *n,
> >       return vhost_poll_start(poll, sock->file);
> >   }
> >
> > +static int sk_has_rx_data(struct sock *sk)
> > +{
> > +     struct socket *sock = sk->sk_socket;
> > +
> > +     if (sock->ops->peek_len)
> > +             return sock->ops->peek_len(sock);
> > +
> > +     return skb_queue_empty(&sk->sk_receive_queue);
> > +}
> > +
> > +static void vhost_net_busy_poll(struct vhost_net *net,
> > +                             struct vhost_virtqueue *rvq,
> > +                             struct vhost_virtqueue *tvq,
> > +                             bool rx)
> > +{
> > +     unsigned long uninitialized_var(endtime);
> > +     struct socket *sock = rvq->private_data;
> > +     struct vhost_virtqueue *vq = rx ? tvq : rvq;
> > +     unsigned long busyloop_timeout = rx ? rvq->busyloop_timeout :
> > +                                           tvq->busyloop_timeout;
>
> As simple as vq->busyloop_timeout?
maybe we should allow user set busyloop_timeout for rx or tx
differently. this code should be moved under mutex.

> > +
> > +     mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);
>
> We need move sock = rvq->private_data under the protection of vq mutex
> if rx is false.
yes, thanks for your review.

> > +     vhost_disable_notify(&net->dev, vq);
> > +
> > +     preempt_disable();
> > +     endtime = busy_clock() + busyloop_timeout;
> > +     while (vhost_can_busy_poll(tvq->dev, endtime) &&
> > +            !(sock && sk_has_rx_data(sock->sk)) &&
> > +            vhost_vq_avail_empty(tvq->dev, tvq))
> > +             cpu_relax();
> > +     preempt_enable();
> > +
> > +     if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
> > +         (!rx && (sock && sk_has_rx_data(sock->sk)))) {
> > +             vhost_poll_queue(&vq->poll);
> > +     } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> > +             vhost_disable_notify(&net->dev, vq);
> > +             vhost_poll_queue(&vq->poll);
> > +     }
> > +
> > +     mutex_unlock(&vq->mutex);
> > +}
> > +
> > +
> >   static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
> >                                   struct vhost_virtqueue *vq,
> >                                   struct iovec iov[], unsigned int iov_size,
> > @@ -621,16 +665,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
> >       return len;
> >   }
> >
> > -static int sk_has_rx_data(struct sock *sk)
> > -{
> > -     struct socket *sock = sk->sk_socket;
> > -
> > -     if (sock->ops->peek_len)
> > -             return sock->ops->peek_len(sock);
> > -
> > -     return skb_queue_empty(&sk->sk_receive_queue);
> > -}
> > -
> >   static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
> >   {
> >       struct vhost_virtqueue *vq = &nvq->vq;
> > @@ -645,39 +679,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
> >
> >   static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
> >   {
> > -     struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
> > -     struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> > -     struct vhost_virtqueue *vq = &nvq->vq;
> > -     unsigned long uninitialized_var(endtime);
> > -     int len = peek_head_len(rvq, sk);
> > +     struct vhost_net_virtqueue *nvq_rx = &net->vqs[VHOST_NET_VQ_RX];
> > +     struct vhost_net_virtqueue *nvq_tx = &net->vqs[VHOST_NET_VQ_TX];
>
> It looks to me rnvq and tnvq is slightly better.
yes. patch 4 will also update.

> Other looks good to me.
>
> Thanks
>
> >
> > -     if (!len && vq->busyloop_timeout) {
> > -             /* Flush batched heads first */
> > -             vhost_rx_signal_used(rvq);
> > -             /* Both tx vq and rx socket were polled here */
> > -             mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
> > -             vhost_disable_notify(&net->dev, vq);
> > +     int len = peek_head_len(nvq_rx, sk);
> >
> > -             preempt_disable();
> > -             endtime = busy_clock() + vq->busyloop_timeout;
> > -
> > -             while (vhost_can_busy_poll(&net->dev, endtime) &&
> > -                    !sk_has_rx_data(sk) &&
> > -                    vhost_vq_avail_empty(&net->dev, vq))
> > -                     cpu_relax();
> > -
> > -             preempt_enable();
> > -
> > -             if (!vhost_vq_avail_empty(&net->dev, vq))
> > -                     vhost_poll_queue(&vq->poll);
> > -             else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> > -                     vhost_disable_notify(&net->dev, vq);
> > -                     vhost_poll_queue(&vq->poll);
> > -             }
> > +     if (!len && nvq_rx->vq.busyloop_timeout) {
> > +             /* Flush batched heads first */
> > +             vhost_rx_signal_used(nvq_rx);
> >
> > -             mutex_unlock(&vq->mutex);
> > +             /* Both tx vq and rx socket were polled here */
> > +             vhost_net_busy_poll(net, &nvq_rx->vq, &nvq_tx->vq, true);
> >
> > -             len = peek_head_len(rvq, sk);
> > +             len = peek_head_len(nvq_rx, sk);
> >       }
> >
> >       return len;
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-07-02  4:05 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-30  6:33 [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop xiangxia.m.yue
2018-06-30  6:33 ` [PATCH net-next v3 1/4] net: vhost: lock the vqs one by one xiangxia.m.yue
2018-06-30  6:33 ` xiangxia.m.yue
2018-07-02  2:21   ` Jason Wang
2018-07-02  2:21   ` Jason Wang
2018-06-30  6:33 ` [PATCH net-next v3 2/4] net: vhost: replace magic number of lock annotation xiangxia.m.yue
2018-07-02  2:21   ` Jason Wang
2018-06-30  6:33 ` xiangxia.m.yue
2018-06-30  6:33 ` [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll() xiangxia.m.yue
2018-07-02  2:29   ` Jason Wang
2018-07-02  4:05     ` Tonghao Zhang
2018-06-30  6:33 ` xiangxia.m.yue
2018-06-30  6:33 ` [PATCH net-next v3 4/4] net: vhost: add rx busy polling in tx path xiangxia.m.yue
2018-06-30  6:33 ` xiangxia.m.yue
2018-06-30  7:03   ` Jesper Dangaard Brouer
2018-06-30  8:48     ` Tonghao Zhang
2018-07-02  2:32   ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.