All of lore.kernel.org
 help / color / mirror / Atom feed
From: jiangyiwen <jiangyiwen@huawei.com>
To: Stefan Hajnoczi <stefanha@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Jason Wang <jasowang@redhat.com>
Cc: <netdev@vger.kernel.org>, <kvm@vger.kernel.org>,
	<virtualization@lists.linux-foundation.org>
Subject: [PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
Date: Wed, 12 Dec 2018 17:31:39 +0800	[thread overview]
Message-ID: <5C10D57B.3070701@huawei.com> (raw)

Guest receive mergeable rx buffer, it can merge
scatter rx buffer into a big buffer and then copy
to user space.

In addition, it also use iovec to replace buf in struct
virtio_vsock_pkt, keep tx and rx consistency. The only
difference is now tx still uses a segment of continuous
physical memory to implement.

Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
---
 drivers/vhost/vsock.c                   |  31 +++++++---
 include/linux/virtio_vsock.h            |   6 +-
 net/vmw_vsock/virtio_transport.c        | 105 ++++++++++++++++++++++++++++----
 net/vmw_vsock/virtio_transport_common.c |  59 ++++++++++++++----
 4 files changed, 166 insertions(+), 35 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index dc52b0f..c7ab0dd 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -179,6 +179,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 		size_t nbytes;
 		size_t len;
 		s16 headcount;
+		size_t remain_len;
+		int i;

 		spin_lock_bh(&vsock->send_pkt_list_lock);
 		if (list_empty(&vsock->send_pkt_list)) {
@@ -221,11 +223,19 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 			break;
 		}

-		nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
-		if (nbytes != pkt->len) {
-			virtio_transport_free_pkt(pkt);
-			vq_err(vq, "Faulted on copying pkt buf\n");
-			break;
+		remain_len = pkt->len;
+		for (i = 0; i < pkt->nr_vecs; i++) {
+			int tmp_len;
+
+			tmp_len = min(remain_len, pkt->vec[i].iov_len);
+			nbytes = copy_to_iter(pkt->vec[i].iov_base, tmp_len, &iov_iter);
+			if (nbytes != tmp_len) {
+				virtio_transport_free_pkt(pkt);
+				vq_err(vq, "Faulted on copying pkt buf\n");
+				break;
+			}
+
+			remain_len -= tmp_len;
 		}

 		vhost_add_used_n(vq, vq->heads, headcount);
@@ -341,6 +351,7 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work)
 	struct iov_iter iov_iter;
 	size_t nbytes;
 	size_t len;
+	void *buf;

 	if (in != 0) {
 		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
@@ -375,13 +386,17 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work)
 		return NULL;
 	}

-	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
-	if (!pkt->buf) {
+	buf = kmalloc(pkt->len, GFP_KERNEL);
+	if (!buf) {
 		kfree(pkt);
 		return NULL;
 	}

-	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+	pkt->vec[0].iov_base = buf;
+	pkt->vec[0].iov_len = pkt->len;
+	pkt->nr_vecs = 1;
+
+	nbytes = copy_from_iter(buf, pkt->len, &iov_iter);
 	if (nbytes != pkt->len) {
 		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
 		       pkt->len, nbytes);
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index da9e1fe..734eeed 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -13,6 +13,8 @@
 #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE	(1024 * 4)
 #define VIRTIO_VSOCK_MAX_BUF_SIZE		0xFFFFFFFFUL
 #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE		(1024 * 64)
+/* virtio_vsock_pkt + max_pkt_len(default MAX_PKT_BUF_SIZE) */
+#define VIRTIO_VSOCK_MAX_VEC_NUM ((VIRTIO_VSOCK_MAX_PKT_BUF_SIZE / PAGE_SIZE) + 1)

 /* Virtio-vsock feature */
 #define VIRTIO_VSOCK_F_MRG_RXBUF 0 /* Host can merge receive buffers. */
@@ -55,10 +57,12 @@ struct virtio_vsock_pkt {
 	struct list_head list;
 	/* socket refcnt not held, only use for cancellation */
 	struct vsock_sock *vsk;
-	void *buf;
+	struct kvec vec[VIRTIO_VSOCK_MAX_VEC_NUM];
+	int nr_vecs;
 	u32 len;
 	u32 off;
 	bool reply;
+	bool mergeable;
 };

 struct virtio_vsock_pkt_info {
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index c4a465c..148b58a 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -155,8 +155,10 @@ static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,

 		sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
 		sgs[out_sg++] = &hdr;
-		if (pkt->buf) {
-			sg_init_one(&buf, pkt->buf, pkt->len);
+		if (pkt->len) {
+			/* Currently only support a segment of memory in tx */
+			BUG_ON(pkt->vec[0].iov_len != pkt->len);
+			sg_init_one(&buf, pkt->vec[0].iov_base, pkt->vec[0].iov_len);
 			sgs[out_sg++] = &buf;
 		}

@@ -304,23 +306,28 @@ static int fill_old_rx_buff(struct virtqueue *vq)
 	struct virtio_vsock_pkt *pkt;
 	struct scatterlist hdr, buf, *sgs[2];
 	int ret;
+	void *pkt_buf;

 	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
 	if (!pkt)
 		return -ENOMEM;

-	pkt->buf = kmalloc(buf_len, GFP_KERNEL);
-	if (!pkt->buf) {
+	pkt_buf = kmalloc(buf_len, GFP_KERNEL);
+	if (!pkt_buf) {
 		virtio_transport_free_pkt(pkt);
 		return -ENOMEM;
 	}

+	pkt->vec[0].iov_base = pkt_buf;
+	pkt->vec[0].iov_len = buf_len;
+	pkt->nr_vecs = 1;
+
 	pkt->len = buf_len;

 	sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
 	sgs[0] = &hdr;

-	sg_init_one(&buf, pkt->buf, buf_len);
+	sg_init_one(&buf, pkt->vec[0].iov_base, buf_len);
 	sgs[1] = &buf;
 	ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
 	if (ret)
@@ -388,11 +395,78 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
 	return val < virtqueue_get_vring_size(vq);
 }

+static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq,
+		struct virtio_vsock *vsock, unsigned int *total_len)
+{
+	struct virtio_vsock_pkt *pkt;
+	u16 num_buf;
+	void *buf;
+	unsigned int len;
+	size_t vsock_hlen = sizeof(struct virtio_vsock_pkt);
+
+	buf = virtqueue_get_buf(vq, &len);
+	if (!buf)
+		return NULL;
+
+	*total_len = len;
+	vsock->rx_buf_nr--;
+
+	if (unlikely(len < vsock_hlen)) {
+		put_page(virt_to_head_page(buf));
+		return NULL;
+	}
+
+	pkt = buf;
+	num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers);
+	if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_VEC_NUM) {
+		put_page(virt_to_head_page(buf));
+		return NULL;
+	}
+
+	/* Initialize pkt residual structure */
+	memset(&pkt->work, 0, vsock_hlen - sizeof(struct virtio_vsock_hdr) -
+			sizeof(struct virtio_vsock_mrg_rxbuf_hdr));
+
+	pkt->mergeable = true;
+	pkt->len = le32_to_cpu(pkt->hdr.len);
+	if (!pkt->len)
+		return pkt;
+
+	len -= vsock_hlen;
+	if (len) {
+		pkt->vec[pkt->nr_vecs].iov_base = buf + vsock_hlen;
+		pkt->vec[pkt->nr_vecs].iov_len = len;
+		/* Shared page with pkt, so get page in advance */
+		get_page(virt_to_head_page(buf));
+		pkt->nr_vecs++;
+	}
+
+	while (--num_buf) {
+		buf = virtqueue_get_buf(vq, &len);
+		if (!buf)
+			goto err;
+
+		*total_len += len;
+		vsock->rx_buf_nr--;
+
+		pkt->vec[pkt->nr_vecs].iov_base = buf;
+		pkt->vec[pkt->nr_vecs].iov_len = len;
+		pkt->nr_vecs++;
+	}
+
+	return pkt;
+err:
+	virtio_transport_free_pkt(pkt);
+	return NULL;
+}
+
 static void virtio_transport_rx_work(struct work_struct *work)
 {
 	struct virtio_vsock *vsock =
 		container_of(work, struct virtio_vsock, rx_work);
 	struct virtqueue *vq;
+	size_t vsock_hlen = vsock->mergeable ? sizeof(struct virtio_vsock_pkt) :
+			sizeof(struct virtio_vsock_hdr);

 	vq = vsock->vqs[VSOCK_VQ_RX];

@@ -412,21 +486,26 @@ static void virtio_transport_rx_work(struct work_struct *work)
 				goto out;
 			}

-			pkt = virtqueue_get_buf(vq, &len);
-			if (!pkt) {
-				break;
-			}
+			if (likely(vsock->mergeable)) {
+				pkt = receive_mergeable(vq, vsock, &len);
+				if (!pkt)
+					break;
+			} else {
+				pkt = virtqueue_get_buf(vq, &len);
+				if (!pkt)
+					break;

-			vsock->rx_buf_nr--;
+				vsock->rx_buf_nr--;
+			}

 			/* Drop short/long packets */
-			if (unlikely(len < sizeof(pkt->hdr) ||
-				     len > sizeof(pkt->hdr) + pkt->len)) {
+			if (unlikely(len < vsock_hlen ||
+				     len > vsock_hlen + pkt->len)) {
 				virtio_transport_free_pkt(pkt);
 				continue;
 			}

-			pkt->len = len - sizeof(pkt->hdr);
+			pkt->len = len - vsock_hlen;
 			virtio_transport_deliver_tap_pkt(pkt);
 			virtio_transport_recv_pkt(pkt);
 		}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 3ae3a33..123a8b6 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -44,6 +44,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void)
 {
 	struct virtio_vsock_pkt *pkt;
 	int err;
+	void *buf = NULL;

 	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
 	if (!pkt)
@@ -62,12 +63,16 @@ static const struct virtio_transport *virtio_transport_get_ops(void)
 	pkt->vsk		= info->vsk;

 	if (info->msg && len > 0) {
-		pkt->buf = kmalloc(len, GFP_KERNEL);
-		if (!pkt->buf)
+		buf = kmalloc(len, GFP_KERNEL);
+		if (!buf)
 			goto out_pkt;
-		err = memcpy_from_msg(pkt->buf, info->msg, len);
+		err = memcpy_from_msg(buf, info->msg, len);
 		if (err)
 			goto out;
+
+		pkt->vec[0].iov_base = buf;
+		pkt->vec[0].iov_len = len;
+		pkt->nr_vecs = 1;
 	}

 	trace_virtio_transport_alloc_pkt(src_cid, src_port,
@@ -80,7 +85,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void)
 	return pkt;

 out:
-	kfree(pkt->buf);
+	kfree(buf);
 out_pkt:
 	kfree(pkt);
 	return NULL;
@@ -92,6 +97,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 	struct virtio_vsock_pkt *pkt = opaque;
 	struct af_vsockmon_hdr *hdr;
 	struct sk_buff *skb;
+	int i;

 	skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len,
 			GFP_ATOMIC);
@@ -134,7 +140,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 	skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));

 	if (pkt->len) {
-		skb_put_data(skb, pkt->buf, pkt->len);
+		for (i = 0; i < pkt->nr_vecs; i++)
+			skb_put_data(skb, pkt->vec[i].iov_base, pkt->vec[i].iov_len);
 	}

 	return skb;
@@ -260,6 +267,9 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,

 	spin_lock_bh(&vvs->rx_lock);
 	while (total < len && !list_empty(&vvs->rx_queue)) {
+		size_t copy_bytes, last_vec_total = 0, vec_off;
+		int i;
+
 		pkt = list_first_entry(&vvs->rx_queue,
 				       struct virtio_vsock_pkt, list);

@@ -272,14 +282,28 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
 		 */
 		spin_unlock_bh(&vvs->rx_lock);

-		err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
-		if (err)
-			goto out;
+		for (i = 0; i < pkt->nr_vecs; i++) {
+			if (pkt->off > last_vec_total + pkt->vec[i].iov_len) {
+				last_vec_total += pkt->vec[i].iov_len;
+				continue;
+			}
+
+			vec_off = pkt->off - last_vec_total;
+			copy_bytes = min(pkt->vec[i].iov_len - vec_off, bytes);
+			err = memcpy_to_msg(msg, pkt->vec[i].iov_base + vec_off,
+					copy_bytes);
+			if (err)
+				goto out;
+
+			bytes -= copy_bytes;
+			pkt->off += copy_bytes;
+			total += copy_bytes;
+			last_vec_total += pkt->vec[i].iov_len;
+			if (!bytes)
+				break;
+		}

 		spin_lock_bh(&vvs->rx_lock);
-
-		total += bytes;
-		pkt->off += bytes;
 		if (pkt->off == pkt->len) {
 			virtio_transport_dec_rx_pkt(vvs, pkt);
 			list_del(&pkt->list);
@@ -1050,8 +1074,17 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)

 void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
 {
-	kfree(pkt->buf);
-	kfree(pkt);
+	int i;
+
+	if (pkt->mergeable) {
+		for (i = 0; i < pkt->nr_vecs; i++)
+			put_page(virt_to_head_page(pkt->vec[i].iov_base));
+		put_page(virt_to_head_page((void *)pkt));
+	} else {
+		for (i = 0; i < pkt->nr_vecs; i++)
+			kfree(pkt->vec[i].iov_base);
+		kfree(pkt);
+	}
 }
 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);

-- 
1.8.3.1

             reply	other threads:[~2018-12-12  9:31 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-12  9:31 jiangyiwen [this message]
2018-12-12 15:31 ` [PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest Michael S. Tsirkin
2018-12-13  2:38   ` jiangyiwen
2018-12-13 14:29     ` Michael S. Tsirkin
2018-12-13 14:29     ` Michael S. Tsirkin
2018-12-14  8:11       ` jiangyiwen
2018-12-14  8:11       ` jiangyiwen
2018-12-12 15:31 ` Michael S. Tsirkin
2018-12-13 16:20 ` Stefan Hajnoczi
2018-12-14  8:41   ` jiangyiwen
2018-12-14  8:41   ` jiangyiwen
2018-12-13 16:20 ` Stefan Hajnoczi
2018-12-12  9:31 jiangyiwen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5C10D57B.3070701@huawei.com \
    --to=jiangyiwen@huawei.com \
    --cc=jasowang@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.