All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
@ 2018-12-12  9:29 jiangyiwen
  2018-12-12 15:37 ` Michael S. Tsirkin
                   ` (3 more replies)
  0 siblings, 4 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-12  9:29 UTC (permalink / raw)
  To: Stefan Hajnoczi, Michael S. Tsirkin, Jason Wang
  Cc: netdev, kvm, virtualization

When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
it will merge big packet into rx vq.

Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
---
 drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
 include/linux/virtio_vsock.h      |   1 +
 include/uapi/linux/virtio_vsock.h |   5 ++
 3 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 34bc3ab..dc52b0f 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -22,7 +22,8 @@
 #define VHOST_VSOCK_DEFAULT_HOST_CID	2

 enum {
-	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
+			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
 };

 /* Used to track all the vhost_vsock instances on the system. */
@@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 	return vsock;
 }

+/* This segment of codes are copied from drivers/vhost/net.c */
+static int get_rx_bufs(struct vhost_virtqueue *vq,
+		struct vring_used_elem *heads, int datalen,
+		unsigned *iovcount, unsigned int quota)
+{
+	unsigned int out, in;
+	int seg = 0;
+	int headcount = 0;
+	unsigned d;
+	int ret;
+	/*
+	 * len is always initialized before use since we are always called with
+	 * datalen > 0.
+	 */
+	u32 uninitialized_var(len);
+
+	while (datalen > 0 && headcount < quota) {
+		if (unlikely(seg >= UIO_MAXIOV)) {
+			ret = -ENOBUFS;
+			goto err;
+		}
+
+		ret = vhost_get_vq_desc(vq, vq->iov + seg,
+				ARRAY_SIZE(vq->iov) - seg, &out,
+				&in, NULL, NULL);
+		if (unlikely(ret < 0))
+			goto err;
+
+		d = ret;
+		if (d == vq->num) {
+			ret = 0;
+			goto err;
+		}
+
+		if (unlikely(out || in <= 0)) {
+			vq_err(vq, "unexpected descriptor format for RX: "
+					"out %d, in %d\n", out, in);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		heads[headcount].id = cpu_to_vhost32(vq, d);
+		len = iov_length(vq->iov + seg, in);
+		heads[headcount].len = cpu_to_vhost32(vq, len);
+		datalen -= len;
+		++headcount;
+		seg += in;
+	}
+
+	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+	*iovcount = seg;
+
+	/* Detect overrun */
+	if (unlikely(datalen > 0)) {
+		ret = UIO_MAXIOV + 1;
+		goto err;
+	}
+	return headcount;
+err:
+	vhost_discard_vq_desc(vq, headcount);
+	return ret;
+}
+
 static void
 vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 			    struct vhost_virtqueue *vq)
@@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
 	bool added = false;
 	bool restart_tx = false;
+	int mergeable;
+	size_t vsock_hlen;

 	mutex_lock(&vq->mutex);

 	if (!vq->private_data)
 		goto out;

+	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
+	/*
+	 * Guest fill page for rx vq in mergeable case, so it will not
+	 * allocate pkt structure, we should reserve size of pkt in advance.
+	 */
+	if (likely(mergeable))
+		vsock_hlen = sizeof(struct virtio_vsock_pkt);
+	else
+		vsock_hlen = sizeof(struct virtio_vsock_hdr);
+
 	/* Avoid further vmexits, we're already processing the virtqueue */
 	vhost_disable_notify(&vsock->dev, vq);

 	for (;;) {
 		struct virtio_vsock_pkt *pkt;
 		struct iov_iter iov_iter;
-		unsigned out, in;
+		unsigned out = 0, in = 0;
 		size_t nbytes;
 		size_t len;
-		int head;
+		s16 headcount;

 		spin_lock_bh(&vsock->send_pkt_list_lock);
 		if (list_empty(&vsock->send_pkt_list)) {
@@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 		list_del_init(&pkt->list);
 		spin_unlock_bh(&vsock->send_pkt_list_lock);

-		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-					 &out, &in, NULL, NULL);
-		if (head < 0) {
-			spin_lock_bh(&vsock->send_pkt_list_lock);
-			list_add(&pkt->list, &vsock->send_pkt_list);
-			spin_unlock_bh(&vsock->send_pkt_list_lock);
-			break;
-		}
-
-		if (head == vq->num) {
+		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
+				&in, likely(mergeable) ? UIO_MAXIOV : 1);
+		if (headcount <= 0) {
 			spin_lock_bh(&vsock->send_pkt_list_lock);
 			list_add(&pkt->list, &vsock->send_pkt_list);
 			spin_unlock_bh(&vsock->send_pkt_list_lock);
@@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 			/* We cannot finish yet if more buffers snuck in while
 			 * re-enabling notify.
 			 */
-			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
 				vhost_disable_notify(&vsock->dev, vq);
 				continue;
 			}
 			break;
 		}

-		if (out) {
-			virtio_transport_free_pkt(pkt);
-			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
-			break;
-		}
-
 		len = iov_length(&vq->iov[out], in);
 		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);

-		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
-		if (nbytes != sizeof(pkt->hdr)) {
+		if (likely(mergeable))
+			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
+		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
+		if (nbytes != vsock_hlen) {
 			virtio_transport_free_pkt(pkt);
 			vq_err(vq, "Faulted on copying pkt hdr\n");
 			break;
@@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 			break;
 		}

-		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+		vhost_add_used_n(vq, vq->heads, headcount);
 		added = true;

 		if (pkt->reply) {
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index bf84418..da9e1fe 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -50,6 +50,7 @@ struct virtio_vsock_sock {

 struct virtio_vsock_pkt {
 	struct virtio_vsock_hdr	hdr;
+	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
 	struct work_struct work;
 	struct list_head list;
 	/* socket refcnt not held, only use for cancellation */
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 1d57ed3..2292f30 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
 	__le32	fwd_cnt;
 } __attribute__((packed));

+/* It add mergeable rx buffers feature */
+struct virtio_vsock_mrg_rxbuf_hdr {
+	__le16  num_buffers;    /* number of mergeable rx buffers */
+} __attribute__((packed));
+
 enum virtio_vsock_type {
 	VIRTIO_VSOCK_TYPE_STREAM = 1,
 };
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12  9:29 [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host jiangyiwen
  2018-12-12 15:37 ` Michael S. Tsirkin
@ 2018-12-12 15:37 ` Michael S. Tsirkin
  2018-12-13  3:08   ` jiangyiwen
  2018-12-12 19:09 ` David Miller
  2018-12-12 19:09 ` David Miller
  3 siblings, 1 reply; 27+ messages in thread
From: Michael S. Tsirkin @ 2018-12-12 15:37 UTC (permalink / raw)
  To: jiangyiwen; +Cc: Stefan Hajnoczi, Jason Wang, netdev, kvm, virtualization

On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> it will merge big packet into rx vq.
> 
> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>

I feel this approach jumps into making interface changes for
optimizations too quickly. For example, what prevents us
from taking a big buffer, prepending each chunk
with the header and writing it out without
host/guest interface changes?

This should allow optimizations such as vhost_add_used_n
batching.

I realize a header in each packet does have a cost,
but it also has advantages such as improved robustness,
I'd like to see more of an apples to apples comparison
of the performance gain from skipping them.


> ---
>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
>  include/linux/virtio_vsock.h      |   1 +
>  include/uapi/linux/virtio_vsock.h |   5 ++
>  3 files changed, 94 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 34bc3ab..dc52b0f 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -22,7 +22,8 @@
>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
> 
>  enum {
> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
>  };
> 
>  /* Used to track all the vhost_vsock instances on the system. */
> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  	return vsock;
>  }
> 
> +/* This segment of codes are copied from drivers/vhost/net.c */
> +static int get_rx_bufs(struct vhost_virtqueue *vq,
> +		struct vring_used_elem *heads, int datalen,
> +		unsigned *iovcount, unsigned int quota)
> +{
> +	unsigned int out, in;
> +	int seg = 0;
> +	int headcount = 0;
> +	unsigned d;
> +	int ret;
> +	/*
> +	 * len is always initialized before use since we are always called with
> +	 * datalen > 0.
> +	 */
> +	u32 uninitialized_var(len);
> +
> +	while (datalen > 0 && headcount < quota) {
> +		if (unlikely(seg >= UIO_MAXIOV)) {
> +			ret = -ENOBUFS;
> +			goto err;
> +		}
> +
> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
> +				ARRAY_SIZE(vq->iov) - seg, &out,
> +				&in, NULL, NULL);
> +		if (unlikely(ret < 0))
> +			goto err;
> +
> +		d = ret;
> +		if (d == vq->num) {
> +			ret = 0;
> +			goto err;
> +		}
> +
> +		if (unlikely(out || in <= 0)) {
> +			vq_err(vq, "unexpected descriptor format for RX: "
> +					"out %d, in %d\n", out, in);
> +			ret = -EINVAL;
> +			goto err;
> +		}
> +
> +		heads[headcount].id = cpu_to_vhost32(vq, d);
> +		len = iov_length(vq->iov + seg, in);
> +		heads[headcount].len = cpu_to_vhost32(vq, len);
> +		datalen -= len;
> +		++headcount;
> +		seg += in;
> +	}
> +
> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +	*iovcount = seg;
> +
> +	/* Detect overrun */
> +	if (unlikely(datalen > 0)) {
> +		ret = UIO_MAXIOV + 1;
> +		goto err;
> +	}
> +	return headcount;
> +err:
> +	vhost_discard_vq_desc(vq, headcount);
> +	return ret;
> +}
> +
>  static void
>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>  			    struct vhost_virtqueue *vq)
> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
>  	bool added = false;
>  	bool restart_tx = false;
> +	int mergeable;
> +	size_t vsock_hlen;
> 
>  	mutex_lock(&vq->mutex);
> 
>  	if (!vq->private_data)
>  		goto out;
> 
> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
> +	/*
> +	 * Guest fill page for rx vq in mergeable case, so it will not
> +	 * allocate pkt structure, we should reserve size of pkt in advance.
> +	 */
> +	if (likely(mergeable))
> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
> +	else
> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
> +
>  	/* Avoid further vmexits, we're already processing the virtqueue */
>  	vhost_disable_notify(&vsock->dev, vq);
> 
>  	for (;;) {
>  		struct virtio_vsock_pkt *pkt;
>  		struct iov_iter iov_iter;
> -		unsigned out, in;
> +		unsigned out = 0, in = 0;
>  		size_t nbytes;
>  		size_t len;
> -		int head;
> +		s16 headcount;
> 
>  		spin_lock_bh(&vsock->send_pkt_list_lock);
>  		if (list_empty(&vsock->send_pkt_list)) {
> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  		list_del_init(&pkt->list);
>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
> 
> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> -					 &out, &in, NULL, NULL);
> -		if (head < 0) {
> -			spin_lock_bh(&vsock->send_pkt_list_lock);
> -			list_add(&pkt->list, &vsock->send_pkt_list);
> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
> -			break;
> -		}
> -
> -		if (head == vq->num) {
> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
> +		if (headcount <= 0) {
>  			spin_lock_bh(&vsock->send_pkt_list_lock);
>  			list_add(&pkt->list, &vsock->send_pkt_list);
>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  			/* We cannot finish yet if more buffers snuck in while
>  			 * re-enabling notify.
>  			 */
> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>  				vhost_disable_notify(&vsock->dev, vq);
>  				continue;
>  			}
>  			break;
>  		}
> 
> -		if (out) {
> -			virtio_transport_free_pkt(pkt);
> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
> -			break;
> -		}
> -
>  		len = iov_length(&vq->iov[out], in);
>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
> 
> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
> -		if (nbytes != sizeof(pkt->hdr)) {
> +		if (likely(mergeable))
> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
> +		if (nbytes != vsock_hlen) {
>  			virtio_transport_free_pkt(pkt);
>  			vq_err(vq, "Faulted on copying pkt hdr\n");
>  			break;
> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  			break;
>  		}
> 
> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
> +		vhost_add_used_n(vq, vq->heads, headcount);
>  		added = true;
> 
>  		if (pkt->reply) {
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index bf84418..da9e1fe 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
> 
>  struct virtio_vsock_pkt {
>  	struct virtio_vsock_hdr	hdr;
> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>  	struct work_struct work;
>  	struct list_head list;
>  	/* socket refcnt not held, only use for cancellation */
> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 1d57ed3..2292f30 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>  	__le32	fwd_cnt;
>  } __attribute__((packed));
> 
> +/* It add mergeable rx buffers feature */
> +struct virtio_vsock_mrg_rxbuf_hdr {
> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> +} __attribute__((packed));
> +
>  enum virtio_vsock_type {
>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>  };
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12  9:29 [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host jiangyiwen
@ 2018-12-12 15:37 ` Michael S. Tsirkin
  2018-12-12 15:37 ` Michael S. Tsirkin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2018-12-12 15:37 UTC (permalink / raw)
  To: jiangyiwen; +Cc: netdev, kvm, Stefan Hajnoczi, virtualization

On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> it will merge big packet into rx vq.
> 
> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>

I feel this approach jumps into making interface changes for
optimizations too quickly. For example, what prevents us
from taking a big buffer, prepending each chunk
with the header and writing it out without
host/guest interface changes?

This should allow optimizations such as vhost_add_used_n
batching.

I realize a header in each packet does have a cost,
but it also has advantages such as improved robustness,
I'd like to see more of an apples to apples comparison
of the performance gain from skipping them.


> ---
>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
>  include/linux/virtio_vsock.h      |   1 +
>  include/uapi/linux/virtio_vsock.h |   5 ++
>  3 files changed, 94 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 34bc3ab..dc52b0f 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -22,7 +22,8 @@
>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
> 
>  enum {
> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
>  };
> 
>  /* Used to track all the vhost_vsock instances on the system. */
> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  	return vsock;
>  }
> 
> +/* This segment of codes are copied from drivers/vhost/net.c */
> +static int get_rx_bufs(struct vhost_virtqueue *vq,
> +		struct vring_used_elem *heads, int datalen,
> +		unsigned *iovcount, unsigned int quota)
> +{
> +	unsigned int out, in;
> +	int seg = 0;
> +	int headcount = 0;
> +	unsigned d;
> +	int ret;
> +	/*
> +	 * len is always initialized before use since we are always called with
> +	 * datalen > 0.
> +	 */
> +	u32 uninitialized_var(len);
> +
> +	while (datalen > 0 && headcount < quota) {
> +		if (unlikely(seg >= UIO_MAXIOV)) {
> +			ret = -ENOBUFS;
> +			goto err;
> +		}
> +
> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
> +				ARRAY_SIZE(vq->iov) - seg, &out,
> +				&in, NULL, NULL);
> +		if (unlikely(ret < 0))
> +			goto err;
> +
> +		d = ret;
> +		if (d == vq->num) {
> +			ret = 0;
> +			goto err;
> +		}
> +
> +		if (unlikely(out || in <= 0)) {
> +			vq_err(vq, "unexpected descriptor format for RX: "
> +					"out %d, in %d\n", out, in);
> +			ret = -EINVAL;
> +			goto err;
> +		}
> +
> +		heads[headcount].id = cpu_to_vhost32(vq, d);
> +		len = iov_length(vq->iov + seg, in);
> +		heads[headcount].len = cpu_to_vhost32(vq, len);
> +		datalen -= len;
> +		++headcount;
> +		seg += in;
> +	}
> +
> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +	*iovcount = seg;
> +
> +	/* Detect overrun */
> +	if (unlikely(datalen > 0)) {
> +		ret = UIO_MAXIOV + 1;
> +		goto err;
> +	}
> +	return headcount;
> +err:
> +	vhost_discard_vq_desc(vq, headcount);
> +	return ret;
> +}
> +
>  static void
>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>  			    struct vhost_virtqueue *vq)
> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
>  	bool added = false;
>  	bool restart_tx = false;
> +	int mergeable;
> +	size_t vsock_hlen;
> 
>  	mutex_lock(&vq->mutex);
> 
>  	if (!vq->private_data)
>  		goto out;
> 
> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
> +	/*
> +	 * Guest fill page for rx vq in mergeable case, so it will not
> +	 * allocate pkt structure, we should reserve size of pkt in advance.
> +	 */
> +	if (likely(mergeable))
> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
> +	else
> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
> +
>  	/* Avoid further vmexits, we're already processing the virtqueue */
>  	vhost_disable_notify(&vsock->dev, vq);
> 
>  	for (;;) {
>  		struct virtio_vsock_pkt *pkt;
>  		struct iov_iter iov_iter;
> -		unsigned out, in;
> +		unsigned out = 0, in = 0;
>  		size_t nbytes;
>  		size_t len;
> -		int head;
> +		s16 headcount;
> 
>  		spin_lock_bh(&vsock->send_pkt_list_lock);
>  		if (list_empty(&vsock->send_pkt_list)) {
> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  		list_del_init(&pkt->list);
>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
> 
> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> -					 &out, &in, NULL, NULL);
> -		if (head < 0) {
> -			spin_lock_bh(&vsock->send_pkt_list_lock);
> -			list_add(&pkt->list, &vsock->send_pkt_list);
> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
> -			break;
> -		}
> -
> -		if (head == vq->num) {
> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
> +		if (headcount <= 0) {
>  			spin_lock_bh(&vsock->send_pkt_list_lock);
>  			list_add(&pkt->list, &vsock->send_pkt_list);
>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  			/* We cannot finish yet if more buffers snuck in while
>  			 * re-enabling notify.
>  			 */
> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>  				vhost_disable_notify(&vsock->dev, vq);
>  				continue;
>  			}
>  			break;
>  		}
> 
> -		if (out) {
> -			virtio_transport_free_pkt(pkt);
> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
> -			break;
> -		}
> -
>  		len = iov_length(&vq->iov[out], in);
>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
> 
> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
> -		if (nbytes != sizeof(pkt->hdr)) {
> +		if (likely(mergeable))
> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
> +		if (nbytes != vsock_hlen) {
>  			virtio_transport_free_pkt(pkt);
>  			vq_err(vq, "Faulted on copying pkt hdr\n");
>  			break;
> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  			break;
>  		}
> 
> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
> +		vhost_add_used_n(vq, vq->heads, headcount);
>  		added = true;
> 
>  		if (pkt->reply) {
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index bf84418..da9e1fe 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
> 
>  struct virtio_vsock_pkt {
>  	struct virtio_vsock_hdr	hdr;
> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>  	struct work_struct work;
>  	struct list_head list;
>  	/* socket refcnt not held, only use for cancellation */
> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 1d57ed3..2292f30 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>  	__le32	fwd_cnt;
>  } __attribute__((packed));
> 
> +/* It add mergeable rx buffers feature */
> +struct virtio_vsock_mrg_rxbuf_hdr {
> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> +} __attribute__((packed));
> +
>  enum virtio_vsock_type {
>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>  };
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12  9:29 [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host jiangyiwen
                   ` (2 preceding siblings ...)
  2018-12-12 19:09 ` David Miller
@ 2018-12-12 19:09 ` David Miller
  2018-12-13  3:11   ` jiangyiwen
  2018-12-13  3:11   ` jiangyiwen
  3 siblings, 2 replies; 27+ messages in thread
From: David Miller @ 2018-12-12 19:09 UTC (permalink / raw)
  To: jiangyiwen; +Cc: stefanha, mst, jasowang, netdev, kvm, virtualization

From: jiangyiwen <jiangyiwen@huawei.com>
Date: Wed, 12 Dec 2018 17:29:31 +0800

> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 1d57ed3..2292f30 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>  	__le32	fwd_cnt;
>  } __attribute__((packed));
> 
> +/* It add mergeable rx buffers feature */
> +struct virtio_vsock_mrg_rxbuf_hdr {
> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> +} __attribute__((packed));
> +

I know the rest of this file uses 'packed' but this attribute should
only be used if absolutely necessary as it incurs a
non-trivial performance penalty for some architectures.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12  9:29 [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host jiangyiwen
  2018-12-12 15:37 ` Michael S. Tsirkin
  2018-12-12 15:37 ` Michael S. Tsirkin
@ 2018-12-12 19:09 ` David Miller
  2018-12-12 19:09 ` David Miller
  3 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2018-12-12 19:09 UTC (permalink / raw)
  To: jiangyiwen; +Cc: kvm, mst, netdev, virtualization, stefanha

From: jiangyiwen <jiangyiwen@huawei.com>
Date: Wed, 12 Dec 2018 17:29:31 +0800

> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 1d57ed3..2292f30 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>  	__le32	fwd_cnt;
>  } __attribute__((packed));
> 
> +/* It add mergeable rx buffers feature */
> +struct virtio_vsock_mrg_rxbuf_hdr {
> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> +} __attribute__((packed));
> +

I know the rest of this file uses 'packed' but this attribute should
only be used if absolutely necessary as it incurs a
non-trivial performance penalty for some architectures.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12 15:37 ` Michael S. Tsirkin
@ 2018-12-13  3:08   ` jiangyiwen
  2018-12-13 14:48     ` Michael S. Tsirkin
                       ` (3 more replies)
  0 siblings, 4 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-13  3:08 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, kvm, Stefan Hajnoczi, virtualization

On 2018/12/12 23:37, Michael S. Tsirkin wrote:
> On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
>> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
>> it will merge big packet into rx vq.
>>
>> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
> 
> I feel this approach jumps into making interface changes for
> optimizations too quickly. For example, what prevents us
> from taking a big buffer, prepending each chunk
> with the header and writing it out without
> host/guest interface changes?
> 
> This should allow optimizations such as vhost_add_used_n
> batching.
> 
> I realize a header in each packet does have a cost,
> but it also has advantages such as improved robustness,
> I'd like to see more of an apples to apples comparison
> of the performance gain from skipping them.
> 
> 

Hi Michael,

I don't fully understand what you mean, do you want to
see a performance comparison that before performance and
only use batching?

In my opinion, guest don't fill big buffer in rx vq because
the balance performance and guest memory pressure, add
mergeable feature can improve big packets performance,
as for small packets, I try to find out the reason, may be
the fluctuation of test results, or in mergeable mode, when
Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
twice in host(hdr + 4k data), and in guest we also should call
virtqueue_get_buf() twice.

Thanks,
Yiwen.

>> ---
>>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
>>  include/linux/virtio_vsock.h      |   1 +
>>  include/uapi/linux/virtio_vsock.h |   5 ++
>>  3 files changed, 94 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index 34bc3ab..dc52b0f 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -22,7 +22,8 @@
>>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
>>
>>  enum {
>> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
>> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
>> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
>>  };
>>
>>  /* Used to track all the vhost_vsock instances on the system. */
>> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>  	return vsock;
>>  }
>>
>> +/* This segment of codes are copied from drivers/vhost/net.c */
>> +static int get_rx_bufs(struct vhost_virtqueue *vq,
>> +		struct vring_used_elem *heads, int datalen,
>> +		unsigned *iovcount, unsigned int quota)
>> +{
>> +	unsigned int out, in;
>> +	int seg = 0;
>> +	int headcount = 0;
>> +	unsigned d;
>> +	int ret;
>> +	/*
>> +	 * len is always initialized before use since we are always called with
>> +	 * datalen > 0.
>> +	 */
>> +	u32 uninitialized_var(len);
>> +
>> +	while (datalen > 0 && headcount < quota) {
>> +		if (unlikely(seg >= UIO_MAXIOV)) {
>> +			ret = -ENOBUFS;
>> +			goto err;
>> +		}
>> +
>> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
>> +				ARRAY_SIZE(vq->iov) - seg, &out,
>> +				&in, NULL, NULL);
>> +		if (unlikely(ret < 0))
>> +			goto err;
>> +
>> +		d = ret;
>> +		if (d == vq->num) {
>> +			ret = 0;
>> +			goto err;
>> +		}
>> +
>> +		if (unlikely(out || in <= 0)) {
>> +			vq_err(vq, "unexpected descriptor format for RX: "
>> +					"out %d, in %d\n", out, in);
>> +			ret = -EINVAL;
>> +			goto err;
>> +		}
>> +
>> +		heads[headcount].id = cpu_to_vhost32(vq, d);
>> +		len = iov_length(vq->iov + seg, in);
>> +		heads[headcount].len = cpu_to_vhost32(vq, len);
>> +		datalen -= len;
>> +		++headcount;
>> +		seg += in;
>> +	}
>> +
>> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
>> +	*iovcount = seg;
>> +
>> +	/* Detect overrun */
>> +	if (unlikely(datalen > 0)) {
>> +		ret = UIO_MAXIOV + 1;
>> +		goto err;
>> +	}
>> +	return headcount;
>> +err:
>> +	vhost_discard_vq_desc(vq, headcount);
>> +	return ret;
>> +}
>> +
>>  static void
>>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>>  			    struct vhost_virtqueue *vq)
>> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
>>  	bool added = false;
>>  	bool restart_tx = false;
>> +	int mergeable;
>> +	size_t vsock_hlen;
>>
>>  	mutex_lock(&vq->mutex);
>>
>>  	if (!vq->private_data)
>>  		goto out;
>>
>> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
>> +	/*
>> +	 * Guest fill page for rx vq in mergeable case, so it will not
>> +	 * allocate pkt structure, we should reserve size of pkt in advance.
>> +	 */
>> +	if (likely(mergeable))
>> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
>> +	else
>> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
>> +
>>  	/* Avoid further vmexits, we're already processing the virtqueue */
>>  	vhost_disable_notify(&vsock->dev, vq);
>>
>>  	for (;;) {
>>  		struct virtio_vsock_pkt *pkt;
>>  		struct iov_iter iov_iter;
>> -		unsigned out, in;
>> +		unsigned out = 0, in = 0;
>>  		size_t nbytes;
>>  		size_t len;
>> -		int head;
>> +		s16 headcount;
>>
>>  		spin_lock_bh(&vsock->send_pkt_list_lock);
>>  		if (list_empty(&vsock->send_pkt_list)) {
>> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>  		list_del_init(&pkt->list);
>>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
>>
>> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>> -					 &out, &in, NULL, NULL);
>> -		if (head < 0) {
>> -			spin_lock_bh(&vsock->send_pkt_list_lock);
>> -			list_add(&pkt->list, &vsock->send_pkt_list);
>> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
>> -			break;
>> -		}
>> -
>> -		if (head == vq->num) {
>> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
>> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
>> +		if (headcount <= 0) {
>>  			spin_lock_bh(&vsock->send_pkt_list_lock);
>>  			list_add(&pkt->list, &vsock->send_pkt_list);
>>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
>> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>  			/* We cannot finish yet if more buffers snuck in while
>>  			 * re-enabling notify.
>>  			 */
>> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>>  				vhost_disable_notify(&vsock->dev, vq);
>>  				continue;
>>  			}
>>  			break;
>>  		}
>>
>> -		if (out) {
>> -			virtio_transport_free_pkt(pkt);
>> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
>> -			break;
>> -		}
>> -
>>  		len = iov_length(&vq->iov[out], in);
>>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
>>
>> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
>> -		if (nbytes != sizeof(pkt->hdr)) {
>> +		if (likely(mergeable))
>> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
>> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
>> +		if (nbytes != vsock_hlen) {
>>  			virtio_transport_free_pkt(pkt);
>>  			vq_err(vq, "Faulted on copying pkt hdr\n");
>>  			break;
>> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>  			break;
>>  		}
>>
>> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
>> +		vhost_add_used_n(vq, vq->heads, headcount);
>>  		added = true;
>>
>>  		if (pkt->reply) {
>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>> index bf84418..da9e1fe 100644
>> --- a/include/linux/virtio_vsock.h
>> +++ b/include/linux/virtio_vsock.h
>> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
>>
>>  struct virtio_vsock_pkt {
>>  	struct virtio_vsock_hdr	hdr;
>> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>>  	struct work_struct work;
>>  	struct list_head list;
>>  	/* socket refcnt not held, only use for cancellation */
>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>> index 1d57ed3..2292f30 100644
>> --- a/include/uapi/linux/virtio_vsock.h
>> +++ b/include/uapi/linux/virtio_vsock.h
>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>  	__le32	fwd_cnt;
>>  } __attribute__((packed));
>>
>> +/* It add mergeable rx buffers feature */
>> +struct virtio_vsock_mrg_rxbuf_hdr {
>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>> +} __attribute__((packed));
>> +
>>  enum virtio_vsock_type {
>>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>>  };
>> -- 
>> 1.8.3.1
>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12 19:09 ` David Miller
  2018-12-13  3:11   ` jiangyiwen
@ 2018-12-13  3:11   ` jiangyiwen
  2018-12-13  5:59     ` David Miller
                       ` (3 more replies)
  1 sibling, 4 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-13  3:11 UTC (permalink / raw)
  To: David Miller; +Cc: stefanha, mst, jasowang, netdev, kvm, virtualization

On 2018/12/13 3:09, David Miller wrote:
> From: jiangyiwen <jiangyiwen@huawei.com>
> Date: Wed, 12 Dec 2018 17:29:31 +0800
> 
>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>> index 1d57ed3..2292f30 100644
>> --- a/include/uapi/linux/virtio_vsock.h
>> +++ b/include/uapi/linux/virtio_vsock.h
>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>  	__le32	fwd_cnt;
>>  } __attribute__((packed));
>>
>> +/* It add mergeable rx buffers feature */
>> +struct virtio_vsock_mrg_rxbuf_hdr {
>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>> +} __attribute__((packed));
>> +
> 
> I know the rest of this file uses 'packed' but this attribute should
> only be used if absolutely necessary as it incurs a
> non-trivial performance penalty for some architectures.
> 
> .
> 

Hi David,

I hope Host can fill fewer bytes into rx virtqueue, so
I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
alignment.

Thanks,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-12 19:09 ` David Miller
@ 2018-12-13  3:11   ` jiangyiwen
  2018-12-13  3:11   ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-13  3:11 UTC (permalink / raw)
  To: David Miller; +Cc: kvm, mst, netdev, virtualization, stefanha

On 2018/12/13 3:09, David Miller wrote:
> From: jiangyiwen <jiangyiwen@huawei.com>
> Date: Wed, 12 Dec 2018 17:29:31 +0800
> 
>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>> index 1d57ed3..2292f30 100644
>> --- a/include/uapi/linux/virtio_vsock.h
>> +++ b/include/uapi/linux/virtio_vsock.h
>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>  	__le32	fwd_cnt;
>>  } __attribute__((packed));
>>
>> +/* It add mergeable rx buffers feature */
>> +struct virtio_vsock_mrg_rxbuf_hdr {
>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>> +} __attribute__((packed));
>> +
> 
> I know the rest of this file uses 'packed' but this attribute should
> only be used if absolutely necessary as it incurs a
> non-trivial performance penalty for some architectures.
> 
> .
> 

Hi David,

I hope Host can fill fewer bytes into rx virtqueue, so
I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
alignment.

Thanks,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:11   ` jiangyiwen
  2018-12-13  5:59     ` David Miller
@ 2018-12-13  5:59     ` David Miller
  2018-12-13  7:42       ` jiangyiwen
  2018-12-13  7:42       ` jiangyiwen
  2018-12-13 14:50     ` Michael S. Tsirkin
  2018-12-13 14:50     ` Michael S. Tsirkin
  3 siblings, 2 replies; 27+ messages in thread
From: David Miller @ 2018-12-13  5:59 UTC (permalink / raw)
  To: jiangyiwen; +Cc: stefanha, mst, jasowang, netdev, kvm, virtualization

From: jiangyiwen <jiangyiwen@huawei.com>
Date: Thu, 13 Dec 2018 11:11:48 +0800

> I hope Host can fill fewer bytes into rx virtqueue, so
> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
> alignment.

The question is if this actully matters.

Do you know?

If the obejct this is embeeded inside of is at least 2 byte aligned,
you are marking it packed for nothing.

There are only %100 downsides to using the packed attribute.

Simply define your datastructures properly, with fixed sized types,
and all padding defined explicitly.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:11   ` jiangyiwen
@ 2018-12-13  5:59     ` David Miller
  2018-12-13  5:59     ` David Miller
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2018-12-13  5:59 UTC (permalink / raw)
  To: jiangyiwen; +Cc: kvm, mst, netdev, virtualization, stefanha

From: jiangyiwen <jiangyiwen@huawei.com>
Date: Thu, 13 Dec 2018 11:11:48 +0800

> I hope Host can fill fewer bytes into rx virtqueue, so
> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
> alignment.

The question is if this actully matters.

Do you know?

If the obejct this is embeeded inside of is at least 2 byte aligned,
you are marking it packed for nothing.

There are only %100 downsides to using the packed attribute.

Simply define your datastructures properly, with fixed sized types,
and all padding defined explicitly.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  5:59     ` David Miller
  2018-12-13  7:42       ` jiangyiwen
@ 2018-12-13  7:42       ` jiangyiwen
  2018-12-13 15:46         ` Stefan Hajnoczi
  2018-12-13 15:46         ` Stefan Hajnoczi
  1 sibling, 2 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-13  7:42 UTC (permalink / raw)
  To: David Miller; +Cc: stefanha, mst, jasowang, netdev, kvm, virtualization

On 2018/12/13 13:59, David Miller wrote:
> From: jiangyiwen <jiangyiwen@huawei.com>
> Date: Thu, 13 Dec 2018 11:11:48 +0800
> 
>> I hope Host can fill fewer bytes into rx virtqueue, so
>> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
>> alignment.
> 
> The question is if this actully matters.
> 
> Do you know?
> 
> If the obejct this is embeeded inside of is at least 2 byte aligned,
> you are marking it packed for nothing.
> 
> There are only %100 downsides to using the packed attribute.
> 
> Simply define your datastructures properly, with fixed sized types,
> and all padding defined explicitly.
> 
> .
> 

Hi David,

Thanks a lot, I need to send number buffers from Host to Guest, so I think
we need to keep the structure size the same between host and guest.
But after your reminder, I feel my code may exist a serious problem,
that in mergeable mode, I send the total structure virtio_vsock_pkt
from Host to Guest, however, this structure size may be different
under different compilers (Guest and Host are different). Then, Guest
may parse the wrong packet length.

David, I want to ask if there is such a problem?

In addition, why I send total virtio_vsock_pkt structure from Host to Guest?
- In order to avoid to allocate virtio_vsock_pkt memory when receiving
  packets, in case of insufficient memory, it may have some advantages, and
  we may keep consistent with old version.

Thanks again,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  5:59     ` David Miller
@ 2018-12-13  7:42       ` jiangyiwen
  2018-12-13  7:42       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-13  7:42 UTC (permalink / raw)
  To: David Miller; +Cc: kvm, mst, netdev, virtualization, stefanha

On 2018/12/13 13:59, David Miller wrote:
> From: jiangyiwen <jiangyiwen@huawei.com>
> Date: Thu, 13 Dec 2018 11:11:48 +0800
> 
>> I hope Host can fill fewer bytes into rx virtqueue, so
>> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
>> alignment.
> 
> The question is if this actully matters.
> 
> Do you know?
> 
> If the obejct this is embeeded inside of is at least 2 byte aligned,
> you are marking it packed for nothing.
> 
> There are only %100 downsides to using the packed attribute.
> 
> Simply define your datastructures properly, with fixed sized types,
> and all padding defined explicitly.
> 
> .
> 

Hi David,

Thanks a lot, I need to send number buffers from Host to Guest, so I think
we need to keep the structure size the same between host and guest.
But after your reminder, I feel my code may exist a serious problem,
that in mergeable mode, I send the total structure virtio_vsock_pkt
from Host to Guest, however, this structure size may be different
under different compilers (Guest and Host are different). Then, Guest
may parse the wrong packet length.

David, I want to ask if there is such a problem?

In addition, why I send total virtio_vsock_pkt structure from Host to Guest?
- In order to avoid to allocate virtio_vsock_pkt memory when receiving
  packets, in case of insufficient memory, it may have some advantages, and
  we may keep consistent with old version.

Thanks again,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:08   ` jiangyiwen
  2018-12-13 14:48     ` Michael S. Tsirkin
@ 2018-12-13 14:48     ` Michael S. Tsirkin
  2018-12-14  7:41       ` jiangyiwen
  2018-12-14  7:41       ` jiangyiwen
  2018-12-13 15:49     ` Stefan Hajnoczi
  2018-12-13 15:49     ` Stefan Hajnoczi
  3 siblings, 2 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2018-12-13 14:48 UTC (permalink / raw)
  To: jiangyiwen; +Cc: Stefan Hajnoczi, Jason Wang, netdev, kvm, virtualization

On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
> > On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> >> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> >> it will merge big packet into rx vq.
> >>
> >> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
> > 
> > I feel this approach jumps into making interface changes for
> > optimizations too quickly. For example, what prevents us
> > from taking a big buffer, prepending each chunk
> > with the header and writing it out without
> > host/guest interface changes?
> > 
> > This should allow optimizations such as vhost_add_used_n
> > batching.
> > 
> > I realize a header in each packet does have a cost,
> > but it also has advantages such as improved robustness,
> > I'd like to see more of an apples to apples comparison
> > of the performance gain from skipping them.
> > 
> > 
> 
> Hi Michael,
> 
> I don't fully understand what you mean, do you want to
> see a performance comparison that before performance and
> only use batching?
> 
> In my opinion, guest don't fill big buffer in rx vq because
> the balance performance and guest memory pressure, add
> mergeable feature can improve big packets performance,
> as for small packets, I try to find out the reason, may be
> the fluctuation of test results, or in mergeable mode, when
> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
> twice in host(hdr + 4k data), and in guest we also should call
> virtqueue_get_buf() twice.
> 
> Thanks,
> Yiwen.

What I mean is that at least some of the gain here is because
larger skbs are passed up the stack.

You do not need larger packets to build larger skbs though.
Just check that the addresses match and you can combine
multiple fragments in a single skb.



> >> ---
> >>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
> >>  include/linux/virtio_vsock.h      |   1 +
> >>  include/uapi/linux/virtio_vsock.h |   5 ++
> >>  3 files changed, 94 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> index 34bc3ab..dc52b0f 100644
> >> --- a/drivers/vhost/vsock.c
> >> +++ b/drivers/vhost/vsock.c
> >> @@ -22,7 +22,8 @@
> >>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
> >>
> >>  enum {
> >> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
> >> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> >> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
> >>  };
> >>
> >>  /* Used to track all the vhost_vsock instances on the system. */
> >> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  	return vsock;
> >>  }
> >>
> >> +/* This segment of codes are copied from drivers/vhost/net.c */
> >> +static int get_rx_bufs(struct vhost_virtqueue *vq,
> >> +		struct vring_used_elem *heads, int datalen,
> >> +		unsigned *iovcount, unsigned int quota)
> >> +{
> >> +	unsigned int out, in;
> >> +	int seg = 0;
> >> +	int headcount = 0;
> >> +	unsigned d;
> >> +	int ret;
> >> +	/*
> >> +	 * len is always initialized before use since we are always called with
> >> +	 * datalen > 0.
> >> +	 */
> >> +	u32 uninitialized_var(len);
> >> +
> >> +	while (datalen > 0 && headcount < quota) {
> >> +		if (unlikely(seg >= UIO_MAXIOV)) {
> >> +			ret = -ENOBUFS;
> >> +			goto err;
> >> +		}
> >> +
> >> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
> >> +				ARRAY_SIZE(vq->iov) - seg, &out,
> >> +				&in, NULL, NULL);
> >> +		if (unlikely(ret < 0))
> >> +			goto err;
> >> +
> >> +		d = ret;
> >> +		if (d == vq->num) {
> >> +			ret = 0;
> >> +			goto err;
> >> +		}
> >> +
> >> +		if (unlikely(out || in <= 0)) {
> >> +			vq_err(vq, "unexpected descriptor format for RX: "
> >> +					"out %d, in %d\n", out, in);
> >> +			ret = -EINVAL;
> >> +			goto err;
> >> +		}
> >> +
> >> +		heads[headcount].id = cpu_to_vhost32(vq, d);
> >> +		len = iov_length(vq->iov + seg, in);
> >> +		heads[headcount].len = cpu_to_vhost32(vq, len);
> >> +		datalen -= len;
> >> +		++headcount;
> >> +		seg += in;
> >> +	}
> >> +
> >> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> >> +	*iovcount = seg;
> >> +
> >> +	/* Detect overrun */
> >> +	if (unlikely(datalen > 0)) {
> >> +		ret = UIO_MAXIOV + 1;
> >> +		goto err;
> >> +	}
> >> +	return headcount;
> >> +err:
> >> +	vhost_discard_vq_desc(vq, headcount);
> >> +	return ret;
> >> +}
> >> +
> >>  static void
> >>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
> >>  			    struct vhost_virtqueue *vq)
> >> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
> >>  	bool added = false;
> >>  	bool restart_tx = false;
> >> +	int mergeable;
> >> +	size_t vsock_hlen;
> >>
> >>  	mutex_lock(&vq->mutex);
> >>
> >>  	if (!vq->private_data)
> >>  		goto out;
> >>
> >> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
> >> +	/*
> >> +	 * Guest fill page for rx vq in mergeable case, so it will not
> >> +	 * allocate pkt structure, we should reserve size of pkt in advance.
> >> +	 */
> >> +	if (likely(mergeable))
> >> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
> >> +	else
> >> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
> >> +
> >>  	/* Avoid further vmexits, we're already processing the virtqueue */
> >>  	vhost_disable_notify(&vsock->dev, vq);
> >>
> >>  	for (;;) {
> >>  		struct virtio_vsock_pkt *pkt;
> >>  		struct iov_iter iov_iter;
> >> -		unsigned out, in;
> >> +		unsigned out = 0, in = 0;
> >>  		size_t nbytes;
> >>  		size_t len;
> >> -		int head;
> >> +		s16 headcount;
> >>
> >>  		spin_lock_bh(&vsock->send_pkt_list_lock);
> >>  		if (list_empty(&vsock->send_pkt_list)) {
> >> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  		list_del_init(&pkt->list);
> >>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
> >>
> >> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> >> -					 &out, &in, NULL, NULL);
> >> -		if (head < 0) {
> >> -			spin_lock_bh(&vsock->send_pkt_list_lock);
> >> -			list_add(&pkt->list, &vsock->send_pkt_list);
> >> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
> >> -			break;
> >> -		}
> >> -
> >> -		if (head == vq->num) {
> >> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
> >> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
> >> +		if (headcount <= 0) {
> >>  			spin_lock_bh(&vsock->send_pkt_list_lock);
> >>  			list_add(&pkt->list, &vsock->send_pkt_list);
> >>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
> >> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  			/* We cannot finish yet if more buffers snuck in while
> >>  			 * re-enabling notify.
> >>  			 */
> >> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> >> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> >>  				vhost_disable_notify(&vsock->dev, vq);
> >>  				continue;
> >>  			}
> >>  			break;
> >>  		}
> >>
> >> -		if (out) {
> >> -			virtio_transport_free_pkt(pkt);
> >> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
> >> -			break;
> >> -		}
> >> -
> >>  		len = iov_length(&vq->iov[out], in);
> >>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
> >>
> >> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
> >> -		if (nbytes != sizeof(pkt->hdr)) {
> >> +		if (likely(mergeable))
> >> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
> >> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
> >> +		if (nbytes != vsock_hlen) {
> >>  			virtio_transport_free_pkt(pkt);
> >>  			vq_err(vq, "Faulted on copying pkt hdr\n");
> >>  			break;
> >> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  			break;
> >>  		}
> >>
> >> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
> >> +		vhost_add_used_n(vq, vq->heads, headcount);
> >>  		added = true;
> >>
> >>  		if (pkt->reply) {
> >> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> >> index bf84418..da9e1fe 100644
> >> --- a/include/linux/virtio_vsock.h
> >> +++ b/include/linux/virtio_vsock.h
> >> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
> >>
> >>  struct virtio_vsock_pkt {
> >>  	struct virtio_vsock_hdr	hdr;
> >> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
> >>  	struct work_struct work;
> >>  	struct list_head list;
> >>  	/* socket refcnt not held, only use for cancellation */
> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >> index 1d57ed3..2292f30 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
> >>  	__le32	fwd_cnt;
> >>  } __attribute__((packed));
> >>
> >> +/* It add mergeable rx buffers feature */
> >> +struct virtio_vsock_mrg_rxbuf_hdr {
> >> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> >> +} __attribute__((packed));
> >> +
> >>  enum virtio_vsock_type {
> >>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> >>  };
> >> -- 
> >> 1.8.3.1
> >>
> > 
> > .
> > 
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:08   ` jiangyiwen
@ 2018-12-13 14:48     ` Michael S. Tsirkin
  2018-12-13 14:48     ` Michael S. Tsirkin
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2018-12-13 14:48 UTC (permalink / raw)
  To: jiangyiwen; +Cc: netdev, kvm, Stefan Hajnoczi, virtualization

On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
> > On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> >> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> >> it will merge big packet into rx vq.
> >>
> >> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
> > 
> > I feel this approach jumps into making interface changes for
> > optimizations too quickly. For example, what prevents us
> > from taking a big buffer, prepending each chunk
> > with the header and writing it out without
> > host/guest interface changes?
> > 
> > This should allow optimizations such as vhost_add_used_n
> > batching.
> > 
> > I realize a header in each packet does have a cost,
> > but it also has advantages such as improved robustness,
> > I'd like to see more of an apples to apples comparison
> > of the performance gain from skipping them.
> > 
> > 
> 
> Hi Michael,
> 
> I don't fully understand what you mean, do you want to
> see a performance comparison that before performance and
> only use batching?
> 
> In my opinion, guest don't fill big buffer in rx vq because
> the balance performance and guest memory pressure, add
> mergeable feature can improve big packets performance,
> as for small packets, I try to find out the reason, may be
> the fluctuation of test results, or in mergeable mode, when
> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
> twice in host(hdr + 4k data), and in guest we also should call
> virtqueue_get_buf() twice.
> 
> Thanks,
> Yiwen.

What I mean is that at least some of the gain here is because
larger skbs are passed up the stack.

You do not need larger packets to build larger skbs though.
Just check that the addresses match and you can combine
multiple fragments in a single skb.



> >> ---
> >>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
> >>  include/linux/virtio_vsock.h      |   1 +
> >>  include/uapi/linux/virtio_vsock.h |   5 ++
> >>  3 files changed, 94 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> index 34bc3ab..dc52b0f 100644
> >> --- a/drivers/vhost/vsock.c
> >> +++ b/drivers/vhost/vsock.c
> >> @@ -22,7 +22,8 @@
> >>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
> >>
> >>  enum {
> >> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
> >> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> >> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
> >>  };
> >>
> >>  /* Used to track all the vhost_vsock instances on the system. */
> >> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  	return vsock;
> >>  }
> >>
> >> +/* This segment of codes are copied from drivers/vhost/net.c */
> >> +static int get_rx_bufs(struct vhost_virtqueue *vq,
> >> +		struct vring_used_elem *heads, int datalen,
> >> +		unsigned *iovcount, unsigned int quota)
> >> +{
> >> +	unsigned int out, in;
> >> +	int seg = 0;
> >> +	int headcount = 0;
> >> +	unsigned d;
> >> +	int ret;
> >> +	/*
> >> +	 * len is always initialized before use since we are always called with
> >> +	 * datalen > 0.
> >> +	 */
> >> +	u32 uninitialized_var(len);
> >> +
> >> +	while (datalen > 0 && headcount < quota) {
> >> +		if (unlikely(seg >= UIO_MAXIOV)) {
> >> +			ret = -ENOBUFS;
> >> +			goto err;
> >> +		}
> >> +
> >> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
> >> +				ARRAY_SIZE(vq->iov) - seg, &out,
> >> +				&in, NULL, NULL);
> >> +		if (unlikely(ret < 0))
> >> +			goto err;
> >> +
> >> +		d = ret;
> >> +		if (d == vq->num) {
> >> +			ret = 0;
> >> +			goto err;
> >> +		}
> >> +
> >> +		if (unlikely(out || in <= 0)) {
> >> +			vq_err(vq, "unexpected descriptor format for RX: "
> >> +					"out %d, in %d\n", out, in);
> >> +			ret = -EINVAL;
> >> +			goto err;
> >> +		}
> >> +
> >> +		heads[headcount].id = cpu_to_vhost32(vq, d);
> >> +		len = iov_length(vq->iov + seg, in);
> >> +		heads[headcount].len = cpu_to_vhost32(vq, len);
> >> +		datalen -= len;
> >> +		++headcount;
> >> +		seg += in;
> >> +	}
> >> +
> >> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> >> +	*iovcount = seg;
> >> +
> >> +	/* Detect overrun */
> >> +	if (unlikely(datalen > 0)) {
> >> +		ret = UIO_MAXIOV + 1;
> >> +		goto err;
> >> +	}
> >> +	return headcount;
> >> +err:
> >> +	vhost_discard_vq_desc(vq, headcount);
> >> +	return ret;
> >> +}
> >> +
> >>  static void
> >>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
> >>  			    struct vhost_virtqueue *vq)
> >> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
> >>  	bool added = false;
> >>  	bool restart_tx = false;
> >> +	int mergeable;
> >> +	size_t vsock_hlen;
> >>
> >>  	mutex_lock(&vq->mutex);
> >>
> >>  	if (!vq->private_data)
> >>  		goto out;
> >>
> >> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
> >> +	/*
> >> +	 * Guest fill page for rx vq in mergeable case, so it will not
> >> +	 * allocate pkt structure, we should reserve size of pkt in advance.
> >> +	 */
> >> +	if (likely(mergeable))
> >> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
> >> +	else
> >> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
> >> +
> >>  	/* Avoid further vmexits, we're already processing the virtqueue */
> >>  	vhost_disable_notify(&vsock->dev, vq);
> >>
> >>  	for (;;) {
> >>  		struct virtio_vsock_pkt *pkt;
> >>  		struct iov_iter iov_iter;
> >> -		unsigned out, in;
> >> +		unsigned out = 0, in = 0;
> >>  		size_t nbytes;
> >>  		size_t len;
> >> -		int head;
> >> +		s16 headcount;
> >>
> >>  		spin_lock_bh(&vsock->send_pkt_list_lock);
> >>  		if (list_empty(&vsock->send_pkt_list)) {
> >> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  		list_del_init(&pkt->list);
> >>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
> >>
> >> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> >> -					 &out, &in, NULL, NULL);
> >> -		if (head < 0) {
> >> -			spin_lock_bh(&vsock->send_pkt_list_lock);
> >> -			list_add(&pkt->list, &vsock->send_pkt_list);
> >> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
> >> -			break;
> >> -		}
> >> -
> >> -		if (head == vq->num) {
> >> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
> >> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
> >> +		if (headcount <= 0) {
> >>  			spin_lock_bh(&vsock->send_pkt_list_lock);
> >>  			list_add(&pkt->list, &vsock->send_pkt_list);
> >>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
> >> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  			/* We cannot finish yet if more buffers snuck in while
> >>  			 * re-enabling notify.
> >>  			 */
> >> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> >> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> >>  				vhost_disable_notify(&vsock->dev, vq);
> >>  				continue;
> >>  			}
> >>  			break;
> >>  		}
> >>
> >> -		if (out) {
> >> -			virtio_transport_free_pkt(pkt);
> >> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
> >> -			break;
> >> -		}
> >> -
> >>  		len = iov_length(&vq->iov[out], in);
> >>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
> >>
> >> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
> >> -		if (nbytes != sizeof(pkt->hdr)) {
> >> +		if (likely(mergeable))
> >> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
> >> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
> >> +		if (nbytes != vsock_hlen) {
> >>  			virtio_transport_free_pkt(pkt);
> >>  			vq_err(vq, "Faulted on copying pkt hdr\n");
> >>  			break;
> >> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >>  			break;
> >>  		}
> >>
> >> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
> >> +		vhost_add_used_n(vq, vq->heads, headcount);
> >>  		added = true;
> >>
> >>  		if (pkt->reply) {
> >> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> >> index bf84418..da9e1fe 100644
> >> --- a/include/linux/virtio_vsock.h
> >> +++ b/include/linux/virtio_vsock.h
> >> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
> >>
> >>  struct virtio_vsock_pkt {
> >>  	struct virtio_vsock_hdr	hdr;
> >> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
> >>  	struct work_struct work;
> >>  	struct list_head list;
> >>  	/* socket refcnt not held, only use for cancellation */
> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >> index 1d57ed3..2292f30 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
> >>  	__le32	fwd_cnt;
> >>  } __attribute__((packed));
> >>
> >> +/* It add mergeable rx buffers feature */
> >> +struct virtio_vsock_mrg_rxbuf_hdr {
> >> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> >> +} __attribute__((packed));
> >> +
> >>  enum virtio_vsock_type {
> >>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
> >>  };
> >> -- 
> >> 1.8.3.1
> >>
> > 
> > .
> > 
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:11   ` jiangyiwen
  2018-12-13  5:59     ` David Miller
  2018-12-13  5:59     ` David Miller
@ 2018-12-13 14:50     ` Michael S. Tsirkin
  2018-12-14  7:47       ` jiangyiwen
  2018-12-14  7:47       ` jiangyiwen
  2018-12-13 14:50     ` Michael S. Tsirkin
  3 siblings, 2 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2018-12-13 14:50 UTC (permalink / raw)
  To: jiangyiwen; +Cc: David Miller, stefanha, jasowang, netdev, kvm, virtualization

On Thu, Dec 13, 2018 at 11:11:48AM +0800, jiangyiwen wrote:
> On 2018/12/13 3:09, David Miller wrote:
> > From: jiangyiwen <jiangyiwen@huawei.com>
> > Date: Wed, 12 Dec 2018 17:29:31 +0800
> > 
> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >> index 1d57ed3..2292f30 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
> >>  	__le32	fwd_cnt;
> >>  } __attribute__((packed));
> >>
> >> +/* It add mergeable rx buffers feature */
> >> +struct virtio_vsock_mrg_rxbuf_hdr {
> >> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> >> +} __attribute__((packed));
> >> +
> > 
> > I know the rest of this file uses 'packed' but this attribute should
> > only be used if absolutely necessary as it incurs a
> > non-trivial performance penalty for some architectures.
> > 
> > .
> > 
> 
> Hi David,
> 
> I hope Host can fill fewer bytes into rx virtqueue, so
> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
> alignment.
> 
> Thanks,
> Yiwen.

It doesn't work like this now though, does it?
Buffers are preallocated and they are always aligned.
So I do not see the point.

-- 
MST

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:11   ` jiangyiwen
                       ` (2 preceding siblings ...)
  2018-12-13 14:50     ` Michael S. Tsirkin
@ 2018-12-13 14:50     ` Michael S. Tsirkin
  3 siblings, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2018-12-13 14:50 UTC (permalink / raw)
  To: jiangyiwen; +Cc: kvm, netdev, virtualization, stefanha, David Miller

On Thu, Dec 13, 2018 at 11:11:48AM +0800, jiangyiwen wrote:
> On 2018/12/13 3:09, David Miller wrote:
> > From: jiangyiwen <jiangyiwen@huawei.com>
> > Date: Wed, 12 Dec 2018 17:29:31 +0800
> > 
> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >> index 1d57ed3..2292f30 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
> >>  	__le32	fwd_cnt;
> >>  } __attribute__((packed));
> >>
> >> +/* It add mergeable rx buffers feature */
> >> +struct virtio_vsock_mrg_rxbuf_hdr {
> >> +	__le16  num_buffers;    /* number of mergeable rx buffers */
> >> +} __attribute__((packed));
> >> +
> > 
> > I know the rest of this file uses 'packed' but this attribute should
> > only be used if absolutely necessary as it incurs a
> > non-trivial performance penalty for some architectures.
> > 
> > .
> > 
> 
> Hi David,
> 
> I hope Host can fill fewer bytes into rx virtqueue, so
> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
> alignment.
> 
> Thanks,
> Yiwen.

It doesn't work like this now though, does it?
Buffers are preallocated and they are always aligned.
So I do not see the point.

-- 
MST

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  7:42       ` jiangyiwen
  2018-12-13 15:46         ` Stefan Hajnoczi
@ 2018-12-13 15:46         ` Stefan Hajnoczi
  1 sibling, 0 replies; 27+ messages in thread
From: Stefan Hajnoczi @ 2018-12-13 15:46 UTC (permalink / raw)
  To: jiangyiwen; +Cc: David Miller, mst, jasowang, netdev, kvm, virtualization

[-- Attachment #1: Type: text/plain, Size: 1792 bytes --]

On Thu, Dec 13, 2018 at 03:42:33PM +0800, jiangyiwen wrote:
> On 2018/12/13 13:59, David Miller wrote:
> > From: jiangyiwen <jiangyiwen@huawei.com>
> > Date: Thu, 13 Dec 2018 11:11:48 +0800
> > 
> >> I hope Host can fill fewer bytes into rx virtqueue, so
> >> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
> >> alignment.
> > 
> > The question is if this actully matters.
> > 
> > Do you know?
> > 
> > If the obejct this is embeeded inside of is at least 2 byte aligned,
> > you are marking it packed for nothing.
> > 
> > There are only %100 downsides to using the packed attribute.
> > 
> > Simply define your datastructures properly, with fixed sized types,
> > and all padding defined explicitly.
> > 
> > .
> > 
> 
> Hi David,
> 
> Thanks a lot, I need to send number buffers from Host to Guest, so I think
> we need to keep the structure size the same between host and guest.
> But after your reminder, I feel my code may exist a serious problem,
> that in mergeable mode, I send the total structure virtio_vsock_pkt
> from Host to Guest, however, this structure size may be different
> under different compilers (Guest and Host are different). Then, Guest
> may parse the wrong packet length.
> 
> David, I want to ask if there is such a problem?
> 
> In addition, why I send total virtio_vsock_pkt structure from Host to Guest?
> - In order to avoid to allocate virtio_vsock_pkt memory when receiving
>   packets, in case of insufficient memory, it may have some advantages, and
>   we may keep consistent with old version.

Yes, virtio_vsock_pkt is internal driver state and should not be part of
the host<->guest interface (also for security reasons it's not good to
expose internal state structs across the interface).

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  7:42       ` jiangyiwen
@ 2018-12-13 15:46         ` Stefan Hajnoczi
  2018-12-13 15:46         ` Stefan Hajnoczi
  1 sibling, 0 replies; 27+ messages in thread
From: Stefan Hajnoczi @ 2018-12-13 15:46 UTC (permalink / raw)
  To: jiangyiwen; +Cc: kvm, mst, netdev, virtualization, David Miller


[-- Attachment #1.1: Type: text/plain, Size: 1792 bytes --]

On Thu, Dec 13, 2018 at 03:42:33PM +0800, jiangyiwen wrote:
> On 2018/12/13 13:59, David Miller wrote:
> > From: jiangyiwen <jiangyiwen@huawei.com>
> > Date: Thu, 13 Dec 2018 11:11:48 +0800
> > 
> >> I hope Host can fill fewer bytes into rx virtqueue, so
> >> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
> >> alignment.
> > 
> > The question is if this actully matters.
> > 
> > Do you know?
> > 
> > If the obejct this is embeeded inside of is at least 2 byte aligned,
> > you are marking it packed for nothing.
> > 
> > There are only %100 downsides to using the packed attribute.
> > 
> > Simply define your datastructures properly, with fixed sized types,
> > and all padding defined explicitly.
> > 
> > .
> > 
> 
> Hi David,
> 
> Thanks a lot, I need to send number buffers from Host to Guest, so I think
> we need to keep the structure size the same between host and guest.
> But after your reminder, I feel my code may exist a serious problem,
> that in mergeable mode, I send the total structure virtio_vsock_pkt
> from Host to Guest, however, this structure size may be different
> under different compilers (Guest and Host are different). Then, Guest
> may parse the wrong packet length.
> 
> David, I want to ask if there is such a problem?
> 
> In addition, why I send total virtio_vsock_pkt structure from Host to Guest?
> - In order to avoid to allocate virtio_vsock_pkt memory when receiving
>   packets, in case of insufficient memory, it may have some advantages, and
>   we may keep consistent with old version.

Yes, virtio_vsock_pkt is internal driver state and should not be part of
the host<->guest interface (also for security reasons it's not good to
expose internal state structs across the interface).

Stefan

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:08   ` jiangyiwen
  2018-12-13 14:48     ` Michael S. Tsirkin
  2018-12-13 14:48     ` Michael S. Tsirkin
@ 2018-12-13 15:49     ` Stefan Hajnoczi
  2018-12-14  7:49       ` jiangyiwen
  2018-12-14  7:49       ` jiangyiwen
  2018-12-13 15:49     ` Stefan Hajnoczi
  3 siblings, 2 replies; 27+ messages in thread
From: Stefan Hajnoczi @ 2018-12-13 15:49 UTC (permalink / raw)
  To: jiangyiwen; +Cc: Michael S. Tsirkin, Jason Wang, netdev, kvm, virtualization

[-- Attachment #1: Type: text/plain, Size: 1764 bytes --]

On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
> > On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> >> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> >> it will merge big packet into rx vq.
> >>
> >> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
> > 
> > I feel this approach jumps into making interface changes for
> > optimizations too quickly. For example, what prevents us
> > from taking a big buffer, prepending each chunk
> > with the header and writing it out without
> > host/guest interface changes?
> > 
> > This should allow optimizations such as vhost_add_used_n
> > batching.
> > 
> > I realize a header in each packet does have a cost,
> > but it also has advantages such as improved robustness,
> > I'd like to see more of an apples to apples comparison
> > of the performance gain from skipping them.
> > 
> > 
> 
> Hi Michael,
> 
> I don't fully understand what you mean, do you want to
> see a performance comparison that before performance and
> only use batching?
> 
> In my opinion, guest don't fill big buffer in rx vq because
> the balance performance and guest memory pressure, add
> mergeable feature can improve big packets performance,
> as for small packets, I try to find out the reason, may be
> the fluctuation of test results, or in mergeable mode, when
> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
> twice in host(hdr + 4k data), and in guest we also should call
> virtqueue_get_buf() twice.

I like the idea of making optimizations in small steps and measuring the
effect of each step.  This way we'll know which aspect caused the
differences in benchmark results.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13  3:08   ` jiangyiwen
                       ` (2 preceding siblings ...)
  2018-12-13 15:49     ` Stefan Hajnoczi
@ 2018-12-13 15:49     ` Stefan Hajnoczi
  3 siblings, 0 replies; 27+ messages in thread
From: Stefan Hajnoczi @ 2018-12-13 15:49 UTC (permalink / raw)
  To: jiangyiwen; +Cc: netdev, virtualization, kvm, Michael S. Tsirkin


[-- Attachment #1.1: Type: text/plain, Size: 1764 bytes --]

On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
> > On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> >> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> >> it will merge big packet into rx vq.
> >>
> >> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
> > 
> > I feel this approach jumps into making interface changes for
> > optimizations too quickly. For example, what prevents us
> > from taking a big buffer, prepending each chunk
> > with the header and writing it out without
> > host/guest interface changes?
> > 
> > This should allow optimizations such as vhost_add_used_n
> > batching.
> > 
> > I realize a header in each packet does have a cost,
> > but it also has advantages such as improved robustness,
> > I'd like to see more of an apples to apples comparison
> > of the performance gain from skipping them.
> > 
> > 
> 
> Hi Michael,
> 
> I don't fully understand what you mean, do you want to
> see a performance comparison that before performance and
> only use batching?
> 
> In my opinion, guest don't fill big buffer in rx vq because
> the balance performance and guest memory pressure, add
> mergeable feature can improve big packets performance,
> as for small packets, I try to find out the reason, may be
> the fluctuation of test results, or in mergeable mode, when
> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
> twice in host(hdr + 4k data), and in guest we also should call
> virtqueue_get_buf() twice.

I like the idea of making optimizations in small steps and measuring the
effect of each step.  This way we'll know which aspect caused the
differences in benchmark results.

Stefan

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13 14:48     ` Michael S. Tsirkin
  2018-12-14  7:41       ` jiangyiwen
@ 2018-12-14  7:41       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-14  7:41 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stefan Hajnoczi, Jason Wang, netdev, kvm, virtualization

On 2018/12/13 22:48, Michael S. Tsirkin wrote:
> On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
>> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
>>> On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
>>>> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
>>>> it will merge big packet into rx vq.
>>>>
>>>> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
>>>
>>> I feel this approach jumps into making interface changes for
>>> optimizations too quickly. For example, what prevents us
>>> from taking a big buffer, prepending each chunk
>>> with the header and writing it out without
>>> host/guest interface changes?
>>>
>>> This should allow optimizations such as vhost_add_used_n
>>> batching.
>>>
>>> I realize a header in each packet does have a cost,
>>> but it also has advantages such as improved robustness,
>>> I'd like to see more of an apples to apples comparison
>>> of the performance gain from skipping them.
>>>
>>>
>>
>> Hi Michael,
>>
>> I don't fully understand what you mean, do you want to
>> see a performance comparison that before performance and
>> only use batching?
>>
>> In my opinion, guest don't fill big buffer in rx vq because
>> the balance performance and guest memory pressure, add
>> mergeable feature can improve big packets performance,
>> as for small packets, I try to find out the reason, may be
>> the fluctuation of test results, or in mergeable mode, when
>> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
>> twice in host(hdr + 4k data), and in guest we also should call
>> virtqueue_get_buf() twice.
>>
>> Thanks,
>> Yiwen.
> 
> What I mean is that at least some of the gain here is because
> larger skbs are passed up the stack.
> 

Yes, the main gain is from larger skbs.

> You do not need larger packets to build larger skbs though.
> Just check that the addresses match and you can combine
> multiple fragments in a single skb.
> 
> 

I understand what you mean, if use batching send that the
performance also can be improved, I can test the real
performance result only use batching.

Thanks,
Yiwen.

> 
>>>> ---
>>>>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
>>>>  include/linux/virtio_vsock.h      |   1 +
>>>>  include/uapi/linux/virtio_vsock.h |   5 ++
>>>>  3 files changed, 94 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>>> index 34bc3ab..dc52b0f 100644
>>>> --- a/drivers/vhost/vsock.c
>>>> +++ b/drivers/vhost/vsock.c
>>>> @@ -22,7 +22,8 @@
>>>>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
>>>>
>>>>  enum {
>>>> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
>>>> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
>>>> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
>>>>  };
>>>>
>>>>  /* Used to track all the vhost_vsock instances on the system. */
>>>> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  	return vsock;
>>>>  }
>>>>
>>>> +/* This segment of codes are copied from drivers/vhost/net.c */
>>>> +static int get_rx_bufs(struct vhost_virtqueue *vq,
>>>> +		struct vring_used_elem *heads, int datalen,
>>>> +		unsigned *iovcount, unsigned int quota)
>>>> +{
>>>> +	unsigned int out, in;
>>>> +	int seg = 0;
>>>> +	int headcount = 0;
>>>> +	unsigned d;
>>>> +	int ret;
>>>> +	/*
>>>> +	 * len is always initialized before use since we are always called with
>>>> +	 * datalen > 0.
>>>> +	 */
>>>> +	u32 uninitialized_var(len);
>>>> +
>>>> +	while (datalen > 0 && headcount < quota) {
>>>> +		if (unlikely(seg >= UIO_MAXIOV)) {
>>>> +			ret = -ENOBUFS;
>>>> +			goto err;
>>>> +		}
>>>> +
>>>> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
>>>> +				ARRAY_SIZE(vq->iov) - seg, &out,
>>>> +				&in, NULL, NULL);
>>>> +		if (unlikely(ret < 0))
>>>> +			goto err;
>>>> +
>>>> +		d = ret;
>>>> +		if (d == vq->num) {
>>>> +			ret = 0;
>>>> +			goto err;
>>>> +		}
>>>> +
>>>> +		if (unlikely(out || in <= 0)) {
>>>> +			vq_err(vq, "unexpected descriptor format for RX: "
>>>> +					"out %d, in %d\n", out, in);
>>>> +			ret = -EINVAL;
>>>> +			goto err;
>>>> +		}
>>>> +
>>>> +		heads[headcount].id = cpu_to_vhost32(vq, d);
>>>> +		len = iov_length(vq->iov + seg, in);
>>>> +		heads[headcount].len = cpu_to_vhost32(vq, len);
>>>> +		datalen -= len;
>>>> +		++headcount;
>>>> +		seg += in;
>>>> +	}
>>>> +
>>>> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
>>>> +	*iovcount = seg;
>>>> +
>>>> +	/* Detect overrun */
>>>> +	if (unlikely(datalen > 0)) {
>>>> +		ret = UIO_MAXIOV + 1;
>>>> +		goto err;
>>>> +	}
>>>> +	return headcount;
>>>> +err:
>>>> +	vhost_discard_vq_desc(vq, headcount);
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  static void
>>>>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>>>>  			    struct vhost_virtqueue *vq)
>>>> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
>>>>  	bool added = false;
>>>>  	bool restart_tx = false;
>>>> +	int mergeable;
>>>> +	size_t vsock_hlen;
>>>>
>>>>  	mutex_lock(&vq->mutex);
>>>>
>>>>  	if (!vq->private_data)
>>>>  		goto out;
>>>>
>>>> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
>>>> +	/*
>>>> +	 * Guest fill page for rx vq in mergeable case, so it will not
>>>> +	 * allocate pkt structure, we should reserve size of pkt in advance.
>>>> +	 */
>>>> +	if (likely(mergeable))
>>>> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
>>>> +	else
>>>> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
>>>> +
>>>>  	/* Avoid further vmexits, we're already processing the virtqueue */
>>>>  	vhost_disable_notify(&vsock->dev, vq);
>>>>
>>>>  	for (;;) {
>>>>  		struct virtio_vsock_pkt *pkt;
>>>>  		struct iov_iter iov_iter;
>>>> -		unsigned out, in;
>>>> +		unsigned out = 0, in = 0;
>>>>  		size_t nbytes;
>>>>  		size_t len;
>>>> -		int head;
>>>> +		s16 headcount;
>>>>
>>>>  		spin_lock_bh(&vsock->send_pkt_list_lock);
>>>>  		if (list_empty(&vsock->send_pkt_list)) {
>>>> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  		list_del_init(&pkt->list);
>>>>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
>>>>
>>>> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>>>> -					 &out, &in, NULL, NULL);
>>>> -		if (head < 0) {
>>>> -			spin_lock_bh(&vsock->send_pkt_list_lock);
>>>> -			list_add(&pkt->list, &vsock->send_pkt_list);
>>>> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
>>>> -			break;
>>>> -		}
>>>> -
>>>> -		if (head == vq->num) {
>>>> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
>>>> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
>>>> +		if (headcount <= 0) {
>>>>  			spin_lock_bh(&vsock->send_pkt_list_lock);
>>>>  			list_add(&pkt->list, &vsock->send_pkt_list);
>>>>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
>>>> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  			/* We cannot finish yet if more buffers snuck in while
>>>>  			 * re-enabling notify.
>>>>  			 */
>>>> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>>>> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>>>>  				vhost_disable_notify(&vsock->dev, vq);
>>>>  				continue;
>>>>  			}
>>>>  			break;
>>>>  		}
>>>>
>>>> -		if (out) {
>>>> -			virtio_transport_free_pkt(pkt);
>>>> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
>>>> -			break;
>>>> -		}
>>>> -
>>>>  		len = iov_length(&vq->iov[out], in);
>>>>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
>>>>
>>>> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
>>>> -		if (nbytes != sizeof(pkt->hdr)) {
>>>> +		if (likely(mergeable))
>>>> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
>>>> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
>>>> +		if (nbytes != vsock_hlen) {
>>>>  			virtio_transport_free_pkt(pkt);
>>>>  			vq_err(vq, "Faulted on copying pkt hdr\n");
>>>>  			break;
>>>> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  			break;
>>>>  		}
>>>>
>>>> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
>>>> +		vhost_add_used_n(vq, vq->heads, headcount);
>>>>  		added = true;
>>>>
>>>>  		if (pkt->reply) {
>>>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>>>> index bf84418..da9e1fe 100644
>>>> --- a/include/linux/virtio_vsock.h
>>>> +++ b/include/linux/virtio_vsock.h
>>>> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
>>>>
>>>>  struct virtio_vsock_pkt {
>>>>  	struct virtio_vsock_hdr	hdr;
>>>> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>>>>  	struct work_struct work;
>>>>  	struct list_head list;
>>>>  	/* socket refcnt not held, only use for cancellation */
>>>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>>>> index 1d57ed3..2292f30 100644
>>>> --- a/include/uapi/linux/virtio_vsock.h
>>>> +++ b/include/uapi/linux/virtio_vsock.h
>>>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>>>  	__le32	fwd_cnt;
>>>>  } __attribute__((packed));
>>>>
>>>> +/* It add mergeable rx buffers feature */
>>>> +struct virtio_vsock_mrg_rxbuf_hdr {
>>>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>>>> +} __attribute__((packed));
>>>> +
>>>>  enum virtio_vsock_type {
>>>>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>>>>  };
>>>> -- 
>>>> 1.8.3.1
>>>>
>>>
>>> .
>>>
>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13 14:48     ` Michael S. Tsirkin
@ 2018-12-14  7:41       ` jiangyiwen
  2018-12-14  7:41       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-14  7:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, kvm, Stefan Hajnoczi, virtualization

On 2018/12/13 22:48, Michael S. Tsirkin wrote:
> On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
>> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
>>> On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
>>>> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
>>>> it will merge big packet into rx vq.
>>>>
>>>> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
>>>
>>> I feel this approach jumps into making interface changes for
>>> optimizations too quickly. For example, what prevents us
>>> from taking a big buffer, prepending each chunk
>>> with the header and writing it out without
>>> host/guest interface changes?
>>>
>>> This should allow optimizations such as vhost_add_used_n
>>> batching.
>>>
>>> I realize a header in each packet does have a cost,
>>> but it also has advantages such as improved robustness,
>>> I'd like to see more of an apples to apples comparison
>>> of the performance gain from skipping them.
>>>
>>>
>>
>> Hi Michael,
>>
>> I don't fully understand what you mean, do you want to
>> see a performance comparison that before performance and
>> only use batching?
>>
>> In my opinion, guest don't fill big buffer in rx vq because
>> the balance performance and guest memory pressure, add
>> mergeable feature can improve big packets performance,
>> as for small packets, I try to find out the reason, may be
>> the fluctuation of test results, or in mergeable mode, when
>> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
>> twice in host(hdr + 4k data), and in guest we also should call
>> virtqueue_get_buf() twice.
>>
>> Thanks,
>> Yiwen.
> 
> What I mean is that at least some of the gain here is because
> larger skbs are passed up the stack.
> 

Yes, the main gain is from larger skbs.

> You do not need larger packets to build larger skbs though.
> Just check that the addresses match and you can combine
> multiple fragments in a single skb.
> 
> 

I understand what you mean, if use batching send that the
performance also can be improved, I can test the real
performance result only use batching.

Thanks,
Yiwen.

> 
>>>> ---
>>>>  drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
>>>>  include/linux/virtio_vsock.h      |   1 +
>>>>  include/uapi/linux/virtio_vsock.h |   5 ++
>>>>  3 files changed, 94 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>>> index 34bc3ab..dc52b0f 100644
>>>> --- a/drivers/vhost/vsock.c
>>>> +++ b/drivers/vhost/vsock.c
>>>> @@ -22,7 +22,8 @@
>>>>  #define VHOST_VSOCK_DEFAULT_HOST_CID	2
>>>>
>>>>  enum {
>>>> -	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
>>>> +	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
>>>> +			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
>>>>  };
>>>>
>>>>  /* Used to track all the vhost_vsock instances on the system. */
>>>> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  	return vsock;
>>>>  }
>>>>
>>>> +/* This segment of codes are copied from drivers/vhost/net.c */
>>>> +static int get_rx_bufs(struct vhost_virtqueue *vq,
>>>> +		struct vring_used_elem *heads, int datalen,
>>>> +		unsigned *iovcount, unsigned int quota)
>>>> +{
>>>> +	unsigned int out, in;
>>>> +	int seg = 0;
>>>> +	int headcount = 0;
>>>> +	unsigned d;
>>>> +	int ret;
>>>> +	/*
>>>> +	 * len is always initialized before use since we are always called with
>>>> +	 * datalen > 0.
>>>> +	 */
>>>> +	u32 uninitialized_var(len);
>>>> +
>>>> +	while (datalen > 0 && headcount < quota) {
>>>> +		if (unlikely(seg >= UIO_MAXIOV)) {
>>>> +			ret = -ENOBUFS;
>>>> +			goto err;
>>>> +		}
>>>> +
>>>> +		ret = vhost_get_vq_desc(vq, vq->iov + seg,
>>>> +				ARRAY_SIZE(vq->iov) - seg, &out,
>>>> +				&in, NULL, NULL);
>>>> +		if (unlikely(ret < 0))
>>>> +			goto err;
>>>> +
>>>> +		d = ret;
>>>> +		if (d == vq->num) {
>>>> +			ret = 0;
>>>> +			goto err;
>>>> +		}
>>>> +
>>>> +		if (unlikely(out || in <= 0)) {
>>>> +			vq_err(vq, "unexpected descriptor format for RX: "
>>>> +					"out %d, in %d\n", out, in);
>>>> +			ret = -EINVAL;
>>>> +			goto err;
>>>> +		}
>>>> +
>>>> +		heads[headcount].id = cpu_to_vhost32(vq, d);
>>>> +		len = iov_length(vq->iov + seg, in);
>>>> +		heads[headcount].len = cpu_to_vhost32(vq, len);
>>>> +		datalen -= len;
>>>> +		++headcount;
>>>> +		seg += in;
>>>> +	}
>>>> +
>>>> +	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
>>>> +	*iovcount = seg;
>>>> +
>>>> +	/* Detect overrun */
>>>> +	if (unlikely(datalen > 0)) {
>>>> +		ret = UIO_MAXIOV + 1;
>>>> +		goto err;
>>>> +	}
>>>> +	return headcount;
>>>> +err:
>>>> +	vhost_discard_vq_desc(vq, headcount);
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  static void
>>>>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>>>>  			    struct vhost_virtqueue *vq)
>>>> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
>>>>  	bool added = false;
>>>>  	bool restart_tx = false;
>>>> +	int mergeable;
>>>> +	size_t vsock_hlen;
>>>>
>>>>  	mutex_lock(&vq->mutex);
>>>>
>>>>  	if (!vq->private_data)
>>>>  		goto out;
>>>>
>>>> +	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
>>>> +	/*
>>>> +	 * Guest fill page for rx vq in mergeable case, so it will not
>>>> +	 * allocate pkt structure, we should reserve size of pkt in advance.
>>>> +	 */
>>>> +	if (likely(mergeable))
>>>> +		vsock_hlen = sizeof(struct virtio_vsock_pkt);
>>>> +	else
>>>> +		vsock_hlen = sizeof(struct virtio_vsock_hdr);
>>>> +
>>>>  	/* Avoid further vmexits, we're already processing the virtqueue */
>>>>  	vhost_disable_notify(&vsock->dev, vq);
>>>>
>>>>  	for (;;) {
>>>>  		struct virtio_vsock_pkt *pkt;
>>>>  		struct iov_iter iov_iter;
>>>> -		unsigned out, in;
>>>> +		unsigned out = 0, in = 0;
>>>>  		size_t nbytes;
>>>>  		size_t len;
>>>> -		int head;
>>>> +		s16 headcount;
>>>>
>>>>  		spin_lock_bh(&vsock->send_pkt_list_lock);
>>>>  		if (list_empty(&vsock->send_pkt_list)) {
>>>> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  		list_del_init(&pkt->list);
>>>>  		spin_unlock_bh(&vsock->send_pkt_list_lock);
>>>>
>>>> -		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>>>> -					 &out, &in, NULL, NULL);
>>>> -		if (head < 0) {
>>>> -			spin_lock_bh(&vsock->send_pkt_list_lock);
>>>> -			list_add(&pkt->list, &vsock->send_pkt_list);
>>>> -			spin_unlock_bh(&vsock->send_pkt_list_lock);
>>>> -			break;
>>>> -		}
>>>> -
>>>> -		if (head == vq->num) {
>>>> +		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
>>>> +				&in, likely(mergeable) ? UIO_MAXIOV : 1);
>>>> +		if (headcount <= 0) {
>>>>  			spin_lock_bh(&vsock->send_pkt_list_lock);
>>>>  			list_add(&pkt->list, &vsock->send_pkt_list);
>>>>  			spin_unlock_bh(&vsock->send_pkt_list_lock);
>>>> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  			/* We cannot finish yet if more buffers snuck in while
>>>>  			 * re-enabling notify.
>>>>  			 */
>>>> -			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>>>> +			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>>>>  				vhost_disable_notify(&vsock->dev, vq);
>>>>  				continue;
>>>>  			}
>>>>  			break;
>>>>  		}
>>>>
>>>> -		if (out) {
>>>> -			virtio_transport_free_pkt(pkt);
>>>> -			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
>>>> -			break;
>>>> -		}
>>>> -
>>>>  		len = iov_length(&vq->iov[out], in);
>>>>  		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
>>>>
>>>> -		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
>>>> -		if (nbytes != sizeof(pkt->hdr)) {
>>>> +		if (likely(mergeable))
>>>> +			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
>>>> +		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
>>>> +		if (nbytes != vsock_hlen) {
>>>>  			virtio_transport_free_pkt(pkt);
>>>>  			vq_err(vq, "Faulted on copying pkt hdr\n");
>>>>  			break;
>>>> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>>>>  			break;
>>>>  		}
>>>>
>>>> -		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
>>>> +		vhost_add_used_n(vq, vq->heads, headcount);
>>>>  		added = true;
>>>>
>>>>  		if (pkt->reply) {
>>>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>>>> index bf84418..da9e1fe 100644
>>>> --- a/include/linux/virtio_vsock.h
>>>> +++ b/include/linux/virtio_vsock.h
>>>> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
>>>>
>>>>  struct virtio_vsock_pkt {
>>>>  	struct virtio_vsock_hdr	hdr;
>>>> +	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>>>>  	struct work_struct work;
>>>>  	struct list_head list;
>>>>  	/* socket refcnt not held, only use for cancellation */
>>>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>>>> index 1d57ed3..2292f30 100644
>>>> --- a/include/uapi/linux/virtio_vsock.h
>>>> +++ b/include/uapi/linux/virtio_vsock.h
>>>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>>>  	__le32	fwd_cnt;
>>>>  } __attribute__((packed));
>>>>
>>>> +/* It add mergeable rx buffers feature */
>>>> +struct virtio_vsock_mrg_rxbuf_hdr {
>>>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>>>> +} __attribute__((packed));
>>>> +
>>>>  enum virtio_vsock_type {
>>>>  	VIRTIO_VSOCK_TYPE_STREAM = 1,
>>>>  };
>>>> -- 
>>>> 1.8.3.1
>>>>
>>>
>>> .
>>>
>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13 14:50     ` Michael S. Tsirkin
  2018-12-14  7:47       ` jiangyiwen
@ 2018-12-14  7:47       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-14  7:47 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: David Miller, stefanha, jasowang, netdev, kvm, virtualization

On 2018/12/13 22:50, Michael S. Tsirkin wrote:
> On Thu, Dec 13, 2018 at 11:11:48AM +0800, jiangyiwen wrote:
>> On 2018/12/13 3:09, David Miller wrote:
>>> From: jiangyiwen <jiangyiwen@huawei.com>
>>> Date: Wed, 12 Dec 2018 17:29:31 +0800
>>>
>>>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>>>> index 1d57ed3..2292f30 100644
>>>> --- a/include/uapi/linux/virtio_vsock.h
>>>> +++ b/include/uapi/linux/virtio_vsock.h
>>>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>>>  	__le32	fwd_cnt;
>>>>  } __attribute__((packed));
>>>>
>>>> +/* It add mergeable rx buffers feature */
>>>> +struct virtio_vsock_mrg_rxbuf_hdr {
>>>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>>>> +} __attribute__((packed));
>>>> +
>>>
>>> I know the rest of this file uses 'packed' but this attribute should
>>> only be used if absolutely necessary as it incurs a
>>> non-trivial performance penalty for some architectures.
>>>
>>> .
>>>
>>
>> Hi David,
>>
>> I hope Host can fill fewer bytes into rx virtqueue, so
>> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
>> alignment.
>>
>> Thanks,
>> Yiwen.
> 
> It doesn't work like this now though, does it?
> Buffers are preallocated and they are always aligned.
> So I do not see the point.
> 

Hi Michael,

Now my patch has a serious problem, I use virtio_vsock_pkt as
the transport header from host to guest, it will cause
guest parse the wrong packet length. Because this structure
size may be different under different compilers
(guest and host are different). I will solve the problem
in later version.

Thanks,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13 14:50     ` Michael S. Tsirkin
@ 2018-12-14  7:47       ` jiangyiwen
  2018-12-14  7:47       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-14  7:47 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, netdev, virtualization, stefanha, David Miller

On 2018/12/13 22:50, Michael S. Tsirkin wrote:
> On Thu, Dec 13, 2018 at 11:11:48AM +0800, jiangyiwen wrote:
>> On 2018/12/13 3:09, David Miller wrote:
>>> From: jiangyiwen <jiangyiwen@huawei.com>
>>> Date: Wed, 12 Dec 2018 17:29:31 +0800
>>>
>>>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>>>> index 1d57ed3..2292f30 100644
>>>> --- a/include/uapi/linux/virtio_vsock.h
>>>> +++ b/include/uapi/linux/virtio_vsock.h
>>>> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>>>>  	__le32	fwd_cnt;
>>>>  } __attribute__((packed));
>>>>
>>>> +/* It add mergeable rx buffers feature */
>>>> +struct virtio_vsock_mrg_rxbuf_hdr {
>>>> +	__le16  num_buffers;    /* number of mergeable rx buffers */
>>>> +} __attribute__((packed));
>>>> +
>>>
>>> I know the rest of this file uses 'packed' but this attribute should
>>> only be used if absolutely necessary as it incurs a
>>> non-trivial performance penalty for some architectures.
>>>
>>> .
>>>
>>
>> Hi David,
>>
>> I hope Host can fill fewer bytes into rx virtqueue, so
>> I keep structure virtio_vsock_mrg_rxbuf_hdr one byte
>> alignment.
>>
>> Thanks,
>> Yiwen.
> 
> It doesn't work like this now though, does it?
> Buffers are preallocated and they are always aligned.
> So I do not see the point.
> 

Hi Michael,

Now my patch has a serious problem, I use virtio_vsock_pkt as
the transport header from host to guest, it will cause
guest parse the wrong packet length. Because this structure
size may be different under different compilers
(guest and host are different). I will solve the problem
in later version.

Thanks,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13 15:49     ` Stefan Hajnoczi
  2018-12-14  7:49       ` jiangyiwen
@ 2018-12-14  7:49       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-14  7:49 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Michael S. Tsirkin, Jason Wang, netdev, kvm, virtualization

On 2018/12/13 23:49, Stefan Hajnoczi wrote:
> On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
>> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
>>> On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
>>>> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
>>>> it will merge big packet into rx vq.
>>>>
>>>> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
>>>
>>> I feel this approach jumps into making interface changes for
>>> optimizations too quickly. For example, what prevents us
>>> from taking a big buffer, prepending each chunk
>>> with the header and writing it out without
>>> host/guest interface changes?
>>>
>>> This should allow optimizations such as vhost_add_used_n
>>> batching.
>>>
>>> I realize a header in each packet does have a cost,
>>> but it also has advantages such as improved robustness,
>>> I'd like to see more of an apples to apples comparison
>>> of the performance gain from skipping them.
>>>
>>>
>>
>> Hi Michael,
>>
>> I don't fully understand what you mean, do you want to
>> see a performance comparison that before performance and
>> only use batching?
>>
>> In my opinion, guest don't fill big buffer in rx vq because
>> the balance performance and guest memory pressure, add
>> mergeable feature can improve big packets performance,
>> as for small packets, I try to find out the reason, may be
>> the fluctuation of test results, or in mergeable mode, when
>> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
>> twice in host(hdr + 4k data), and in guest we also should call
>> virtqueue_get_buf() twice.
> 
> I like the idea of making optimizations in small steps and measuring the
> effect of each step.  This way we'll know which aspect caused the
> differences in benchmark results.
> 
> Stefan
> 

Yes, now I also focus on other project, but I will use some
extra time to measure it.

Thanks,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
  2018-12-13 15:49     ` Stefan Hajnoczi
@ 2018-12-14  7:49       ` jiangyiwen
  2018-12-14  7:49       ` jiangyiwen
  1 sibling, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-14  7:49 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: netdev, virtualization, kvm, Michael S. Tsirkin

On 2018/12/13 23:49, Stefan Hajnoczi wrote:
> On Thu, Dec 13, 2018 at 11:08:04AM +0800, jiangyiwen wrote:
>> On 2018/12/12 23:37, Michael S. Tsirkin wrote:
>>> On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
>>>> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
>>>> it will merge big packet into rx vq.
>>>>
>>>> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
>>>
>>> I feel this approach jumps into making interface changes for
>>> optimizations too quickly. For example, what prevents us
>>> from taking a big buffer, prepending each chunk
>>> with the header and writing it out without
>>> host/guest interface changes?
>>>
>>> This should allow optimizations such as vhost_add_used_n
>>> batching.
>>>
>>> I realize a header in each packet does have a cost,
>>> but it also has advantages such as improved robustness,
>>> I'd like to see more of an apples to apples comparison
>>> of the performance gain from skipping them.
>>>
>>>
>>
>> Hi Michael,
>>
>> I don't fully understand what you mean, do you want to
>> see a performance comparison that before performance and
>> only use batching?
>>
>> In my opinion, guest don't fill big buffer in rx vq because
>> the balance performance and guest memory pressure, add
>> mergeable feature can improve big packets performance,
>> as for small packets, I try to find out the reason, may be
>> the fluctuation of test results, or in mergeable mode, when
>> Host send a 4k packet to Guest, we should call vhost_get_vq_desc()
>> twice in host(hdr + 4k data), and in guest we also should call
>> virtqueue_get_buf() twice.
> 
> I like the idea of making optimizations in small steps and measuring the
> effect of each step.  This way we'll know which aspect caused the
> differences in benchmark results.
> 
> Stefan
> 

Yes, now I also focus on other project, but I will use some
extra time to measure it.

Thanks,
Yiwen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host
@ 2018-12-12  9:29 jiangyiwen
  0 siblings, 0 replies; 27+ messages in thread
From: jiangyiwen @ 2018-12-12  9:29 UTC (permalink / raw)
  To: Stefan Hajnoczi, Michael S. Tsirkin, Jason Wang
  Cc: netdev, kvm, virtualization

When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
it will merge big packet into rx vq.

Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
---
 drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
 include/linux/virtio_vsock.h      |   1 +
 include/uapi/linux/virtio_vsock.h |   5 ++
 3 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 34bc3ab..dc52b0f 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -22,7 +22,8 @@
 #define VHOST_VSOCK_DEFAULT_HOST_CID	2

 enum {
-	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
+			(1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
 };

 /* Used to track all the vhost_vsock instances on the system. */
@@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 	return vsock;
 }

+/* This segment of codes are copied from drivers/vhost/net.c */
+static int get_rx_bufs(struct vhost_virtqueue *vq,
+		struct vring_used_elem *heads, int datalen,
+		unsigned *iovcount, unsigned int quota)
+{
+	unsigned int out, in;
+	int seg = 0;
+	int headcount = 0;
+	unsigned d;
+	int ret;
+	/*
+	 * len is always initialized before use since we are always called with
+	 * datalen > 0.
+	 */
+	u32 uninitialized_var(len);
+
+	while (datalen > 0 && headcount < quota) {
+		if (unlikely(seg >= UIO_MAXIOV)) {
+			ret = -ENOBUFS;
+			goto err;
+		}
+
+		ret = vhost_get_vq_desc(vq, vq->iov + seg,
+				ARRAY_SIZE(vq->iov) - seg, &out,
+				&in, NULL, NULL);
+		if (unlikely(ret < 0))
+			goto err;
+
+		d = ret;
+		if (d == vq->num) {
+			ret = 0;
+			goto err;
+		}
+
+		if (unlikely(out || in <= 0)) {
+			vq_err(vq, "unexpected descriptor format for RX: "
+					"out %d, in %d\n", out, in);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		heads[headcount].id = cpu_to_vhost32(vq, d);
+		len = iov_length(vq->iov + seg, in);
+		heads[headcount].len = cpu_to_vhost32(vq, len);
+		datalen -= len;
+		++headcount;
+		seg += in;
+	}
+
+	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+	*iovcount = seg;
+
+	/* Detect overrun */
+	if (unlikely(datalen > 0)) {
+		ret = UIO_MAXIOV + 1;
+		goto err;
+	}
+	return headcount;
+err:
+	vhost_discard_vq_desc(vq, headcount);
+	return ret;
+}
+
 static void
 vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 			    struct vhost_virtqueue *vq)
@@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
 	bool added = false;
 	bool restart_tx = false;
+	int mergeable;
+	size_t vsock_hlen;

 	mutex_lock(&vq->mutex);

 	if (!vq->private_data)
 		goto out;

+	mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
+	/*
+	 * Guest fill page for rx vq in mergeable case, so it will not
+	 * allocate pkt structure, we should reserve size of pkt in advance.
+	 */
+	if (likely(mergeable))
+		vsock_hlen = sizeof(struct virtio_vsock_pkt);
+	else
+		vsock_hlen = sizeof(struct virtio_vsock_hdr);
+
 	/* Avoid further vmexits, we're already processing the virtqueue */
 	vhost_disable_notify(&vsock->dev, vq);

 	for (;;) {
 		struct virtio_vsock_pkt *pkt;
 		struct iov_iter iov_iter;
-		unsigned out, in;
+		unsigned out = 0, in = 0;
 		size_t nbytes;
 		size_t len;
-		int head;
+		s16 headcount;

 		spin_lock_bh(&vsock->send_pkt_list_lock);
 		if (list_empty(&vsock->send_pkt_list)) {
@@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 		list_del_init(&pkt->list);
 		spin_unlock_bh(&vsock->send_pkt_list_lock);

-		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-					 &out, &in, NULL, NULL);
-		if (head < 0) {
-			spin_lock_bh(&vsock->send_pkt_list_lock);
-			list_add(&pkt->list, &vsock->send_pkt_list);
-			spin_unlock_bh(&vsock->send_pkt_list_lock);
-			break;
-		}
-
-		if (head == vq->num) {
+		headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
+				&in, likely(mergeable) ? UIO_MAXIOV : 1);
+		if (headcount <= 0) {
 			spin_lock_bh(&vsock->send_pkt_list_lock);
 			list_add(&pkt->list, &vsock->send_pkt_list);
 			spin_unlock_bh(&vsock->send_pkt_list_lock);
@@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 			/* We cannot finish yet if more buffers snuck in while
 			 * re-enabling notify.
 			 */
-			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+			if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) {
 				vhost_disable_notify(&vsock->dev, vq);
 				continue;
 			}
 			break;
 		}

-		if (out) {
-			virtio_transport_free_pkt(pkt);
-			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
-			break;
-		}
-
 		len = iov_length(&vq->iov[out], in);
 		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);

-		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
-		if (nbytes != sizeof(pkt->hdr)) {
+		if (likely(mergeable))
+			pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
+		nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
+		if (nbytes != vsock_hlen) {
 			virtio_transport_free_pkt(pkt);
 			vq_err(vq, "Faulted on copying pkt hdr\n");
 			break;
@@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 			break;
 		}

-		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+		vhost_add_used_n(vq, vq->heads, headcount);
 		added = true;

 		if (pkt->reply) {
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index bf84418..da9e1fe 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -50,6 +50,7 @@ struct virtio_vsock_sock {

 struct virtio_vsock_pkt {
 	struct virtio_vsock_hdr	hdr;
+	struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
 	struct work_struct work;
 	struct list_head list;
 	/* socket refcnt not held, only use for cancellation */
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 1d57ed3..2292f30 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
 	__le32	fwd_cnt;
 } __attribute__((packed));

+/* It add mergeable rx buffers feature */
+struct virtio_vsock_mrg_rxbuf_hdr {
+	__le16  num_buffers;    /* number of mergeable rx buffers */
+} __attribute__((packed));
+
 enum virtio_vsock_type {
 	VIRTIO_VSOCK_TYPE_STREAM = 1,
 };
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2018-12-14  7:50 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-12  9:29 [PATCH v2 2/5] VSOCK: support fill data to mergeable rx buffer in host jiangyiwen
2018-12-12 15:37 ` Michael S. Tsirkin
2018-12-12 15:37 ` Michael S. Tsirkin
2018-12-13  3:08   ` jiangyiwen
2018-12-13 14:48     ` Michael S. Tsirkin
2018-12-13 14:48     ` Michael S. Tsirkin
2018-12-14  7:41       ` jiangyiwen
2018-12-14  7:41       ` jiangyiwen
2018-12-13 15:49     ` Stefan Hajnoczi
2018-12-14  7:49       ` jiangyiwen
2018-12-14  7:49       ` jiangyiwen
2018-12-13 15:49     ` Stefan Hajnoczi
2018-12-12 19:09 ` David Miller
2018-12-12 19:09 ` David Miller
2018-12-13  3:11   ` jiangyiwen
2018-12-13  3:11   ` jiangyiwen
2018-12-13  5:59     ` David Miller
2018-12-13  5:59     ` David Miller
2018-12-13  7:42       ` jiangyiwen
2018-12-13  7:42       ` jiangyiwen
2018-12-13 15:46         ` Stefan Hajnoczi
2018-12-13 15:46         ` Stefan Hajnoczi
2018-12-13 14:50     ` Michael S. Tsirkin
2018-12-14  7:47       ` jiangyiwen
2018-12-14  7:47       ` jiangyiwen
2018-12-13 14:50     ` Michael S. Tsirkin
  -- strict thread matches above, loose matches on Subject: below --
2018-12-12  9:29 jiangyiwen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.