All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
To: Stefan Hajnoczi <stefanha@redhat.com>,
	Stefano Garzarella <sgarzare@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Jason Wang <jasowang@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Arseniy Krasnov <AVKrasnov@sberdevices.ru>,
	"Krasnov Arseniy" <oxffffaa@gmail.com>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	"virtualization@lists.linux-foundation.org" 
	<virtualization@lists.linux-foundation.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	kernel <kernel@sberdevices.ru>
Subject: [RFC PATCH v1 07/12] vsock/virtio: MGS_ZEROCOPY flag support
Date: Mon, 6 Feb 2023 07:00:35 +0000	[thread overview]
Message-ID: <716333a1-d6d1-3dde-d04a-365d4a361bfe@sberdevices.ru> (raw)
In-Reply-To: <0e7c6fc4-b4a6-a27b-36e9-359597bba2b5@sberdevices.ru>

This adds main logic of MSG_ZEROCOPY flag processing for packet
creation. When this flag is set and user's iov iterator fits for
zerocopy transmission, call 'get_user_pages()' and add returned
pages to the newly created skb.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 net/vmw_vsock/virtio_transport_common.c | 212 ++++++++++++++++++++++--
 1 file changed, 195 insertions(+), 17 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 05ce97b967ad..69e37f8a68a6 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -37,6 +37,169 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
 	return container_of(t, struct virtio_transport, transport);
 }
 
+static int virtio_transport_can_zcopy(struct iov_iter *iov_iter,
+				      size_t free_space)
+{
+	size_t pages;
+	int i;
+
+	if (!iter_is_iovec(iov_iter))
+		return -1;
+
+	if (iov_iter->iov_offset)
+		return -1;
+
+	/* We can't send whole iov. */
+	if (free_space < iov_iter->count)
+		return -1;
+
+	for (pages = 0, i = 0; i < iov_iter->nr_segs; i++) {
+		const struct iovec *iovec;
+		int pages_in_elem;
+
+		iovec = &iov_iter->iov[i];
+
+		/* Base must be page aligned. */
+		if (offset_in_page(iovec->iov_base))
+			return -1;
+
+		/* Only last element could have not page aligned size. */
+		if (i != (iov_iter->nr_segs - 1)) {
+			if (offset_in_page(iovec->iov_len))
+				return -1;
+
+			pages_in_elem = iovec->iov_len >> PAGE_SHIFT;
+		} else {
+			pages_in_elem = round_up(iovec->iov_len, PAGE_SIZE);
+			pages_in_elem >>= PAGE_SHIFT;
+		}
+
+		/* In case of user's pages - one page is one frag. */
+		if (pages + pages_in_elem > MAX_SKB_FRAGS)
+			return -1;
+
+		pages += pages_in_elem;
+	}
+
+	return 0;
+}
+
+static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
+					   struct sk_buff *skb,
+					   struct iov_iter *iter,
+					   bool zerocopy)
+{
+	struct ubuf_info_msgzc *uarg_zc;
+	struct ubuf_info *uarg;
+
+	uarg = msg_zerocopy_realloc(sk_vsock(vsk),
+				    iov_length(iter->iov, iter->nr_segs),
+				    NULL);
+
+	if (!uarg)
+		return -1;
+
+	uarg_zc = uarg_to_msgzc(uarg);
+	uarg_zc->zerocopy = zerocopy ? 1 : 0;
+
+	skb_zcopy_init(skb, uarg);
+
+	return 0;
+}
+
+static int virtio_transport_fill_nonlinear_skb(struct sk_buff *skb,
+					       struct vsock_sock *vsk,
+					       struct virtio_vsock_pkt_info *info)
+{
+	struct iov_iter *iter;
+	int frag_idx;
+	int seg_idx;
+
+	iter = &info->msg->msg_iter;
+	frag_idx = 0;
+	VIRTIO_VSOCK_SKB_CB(skb)->curr_frag = 0;
+	VIRTIO_VSOCK_SKB_CB(skb)->frag_off = 0;
+
+	/* At this moment:
+	 * 1) 'iov_offset' is zero.
+	 * 2) Every 'iov_base' and 'iov_len' are also page aligned
+	 *    (except length of the last element).
+	 * 3) Number of pages in this iov <= MAX_SKB_FRAGS.
+	 * 4) Length of the data fits in current credit space.
+	 */
+	for (seg_idx = 0; seg_idx < iter->nr_segs; seg_idx++) {
+		struct page *user_pages[MAX_SKB_FRAGS];
+		const struct iovec *iovec;
+		size_t last_frag_len;
+		size_t pages_in_seg;
+		int page_idx;
+
+		iovec = &iter->iov[seg_idx];
+		pages_in_seg = iovec->iov_len >> PAGE_SHIFT;
+
+		if (iovec->iov_len % PAGE_SIZE) {
+			last_frag_len = iovec->iov_len % PAGE_SIZE;
+			pages_in_seg++;
+		} else {
+			last_frag_len = PAGE_SIZE;
+		}
+
+		if (get_user_pages((unsigned long)iovec->iov_base,
+				   pages_in_seg, FOLL_GET, user_pages,
+				   NULL) != pages_in_seg)
+			return -1;
+
+		for (page_idx = 0; page_idx < pages_in_seg; page_idx++) {
+			int frag_len = PAGE_SIZE;
+
+			if (page_idx == (pages_in_seg - 1))
+				frag_len = last_frag_len;
+
+			skb_fill_page_desc(skb, frag_idx++,
+					   user_pages[page_idx], 0,
+					   frag_len);
+			skb_len_add(skb, frag_len);
+		}
+	}
+
+	return virtio_transport_init_zcopy_skb(vsk, skb, iter, true);
+}
+
+static int virtio_transport_copy_payload(struct sk_buff *skb,
+					 struct vsock_sock *vsk,
+					 struct virtio_vsock_pkt_info *info,
+					 size_t len)
+{
+	void *payload;
+	int err;
+
+	payload = skb_put(skb, len);
+	err = memcpy_from_msg(payload, info->msg, len);
+	if (err)
+		return -1;
+
+	if (msg_data_left(info->msg))
+		return 0;
+
+	if (info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+		struct virtio_vsock_hdr *hdr;
+
+		hdr = virtio_vsock_hdr(skb);
+
+		hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+		if (info->msg->msg_flags & MSG_EOR)
+			hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+	}
+
+	if (info->flags & MSG_ZEROCOPY)
+		return virtio_transport_init_zcopy_skb(vsk, skb,
+						       &info->msg->msg_iter,
+						       false);
+
+	return 0;
+}
+
 /* Returns a new packet on success, otherwise returns NULL.
  *
  * If NULL is returned, errp is set to a negative errno.
@@ -47,15 +210,31 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
 			   u32 src_cid,
 			   u32 src_port,
 			   u32 dst_cid,
-			   u32 dst_port)
+			   u32 dst_port,
+			   struct vsock_sock *vsk)
 {
-	const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
+	const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM;
 	struct virtio_vsock_hdr *hdr;
 	struct sk_buff *skb;
-	void *payload;
-	int err;
+	bool use_zcopy = false;
+
+	if (info->msg) {
+		/* If SOCK_ZEROCOPY is not enabled, ignore MSG_ZEROCOPY
+		 * flag later and continue in classic way(e.g. without
+		 * completion).
+		 */
+		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
+			info->flags &= ~MSG_ZEROCOPY;
+		} else {
+			if ((info->flags & MSG_ZEROCOPY) &&
+			    !virtio_transport_can_zcopy(&info->msg->msg_iter, len)) {
+				use_zcopy = true;
+			}
+		}
+	}
 
-	skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
+	/* For MSG_ZEROCOPY length will be added later. */
+	skb = virtio_vsock_alloc_skb(skb_len + (use_zcopy ? 0 : len), GFP_KERNEL);
 	if (!skb)
 		return NULL;
 
@@ -70,18 +249,15 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
 	hdr->len	= cpu_to_le32(len);
 
 	if (info->msg && len > 0) {
-		payload = skb_put(skb, len);
-		err = memcpy_from_msg(payload, info->msg, len);
-		if (err)
-			goto out;
+		int err;
 
-		if (msg_data_left(info->msg) == 0 &&
-		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
-			hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+		if (use_zcopy)
+			err = virtio_transport_fill_nonlinear_skb(skb, vsk, info);
+		else
+			err = virtio_transport_copy_payload(skb, vsk, info, len);
 
-			if (info->msg->msg_flags & MSG_EOR)
-				hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
-		}
+		if (err)
+			goto out;
 	}
 
 	if (info->reply)
@@ -266,7 +442,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 
 	skb = virtio_transport_alloc_skb(info, pkt_len,
 					 src_cid, src_port,
-					 dst_cid, dst_port);
+					 dst_cid, dst_port,
+					 vsk);
 	if (!skb) {
 		virtio_transport_put_credit(vvs, pkt_len);
 		return -ENOMEM;
@@ -842,6 +1019,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
 		.msg = msg,
 		.pkt_len = len,
 		.vsk = vsk,
+		.flags = msg->msg_flags,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -894,7 +1072,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
 					   le64_to_cpu(hdr->dst_cid),
 					   le32_to_cpu(hdr->dst_port),
 					   le64_to_cpu(hdr->src_cid),
-					   le32_to_cpu(hdr->src_port));
+					   le32_to_cpu(hdr->src_port), NULL);
 	if (!reply)
 		return -ENOMEM;
 
-- 
2.25.1

  parent reply	other threads:[~2023-02-06  7:00 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-06  6:51 [RFC PATCH v1 00/12] vsock: MSG_ZEROCOPY flag support Arseniy Krasnov
2023-02-06  6:53 ` [RFC PATCH v1 01/12] vsock: check error queue to set EPOLLERR Arseniy Krasnov
2023-02-16 13:40   ` Stefano Garzarella
2023-02-16 13:40     ` Stefano Garzarella
2023-02-20  9:00     ` Krasnov Arseniy
2023-02-06  6:54 ` [RFC PATCH v1 02/12] vsock: read from socket's error queue Arseniy Krasnov
2023-02-16 13:55   ` Stefano Garzarella
2023-02-16 13:55     ` Stefano Garzarella
2023-02-06  6:55 ` [RFC PATCH v1 03/12] vsock: check for MSG_ZEROCOPY support Arseniy Krasnov
2023-02-16 14:02   ` Stefano Garzarella
2023-02-16 14:02     ` Stefano Garzarella
2023-02-06  6:57 ` [RFC PATCH v1 04/12] vhost/vsock: non-linear skb handling support Arseniy Krasnov
2023-02-16 14:09   ` Stefano Garzarella
2023-02-16 14:09     ` Stefano Garzarella
2023-02-20  9:01     ` Krasnov Arseniy
2023-02-06  6:58 ` [RFC PATCH v1 05/12] vsock/virtio: non-linear skb support Arseniy Krasnov
2023-02-16 14:18   ` Stefano Garzarella
2023-02-16 14:18     ` Stefano Garzarella
2023-02-20  9:02     ` Krasnov Arseniy
2023-02-06  6:59 ` [RFC PATCH v1 06/12] vsock/virtio: non-linear skb handling for TAP dev Arseniy Krasnov
2023-02-16 14:30   ` Stefano Garzarella
2023-02-16 14:30     ` Stefano Garzarella
2023-02-06  7:00 ` Arseniy Krasnov [this message]
2023-02-16 15:16   ` [RFC PATCH v1 07/12] vsock/virtio: MGS_ZEROCOPY flag support Stefano Garzarella
2023-02-16 15:16     ` Stefano Garzarella
2023-02-20  9:04     ` Krasnov Arseniy
2023-02-28 10:26       ` Stefano Garzarella
2023-02-28 10:26         ` Stefano Garzarella
2023-02-06  7:01 ` [RFC PATCH v1 08/12] vhost/vsock: support MSG_ZEROCOPY for transport Arseniy Krasnov
2023-02-06  7:02 ` [RFC PATCH v1 09/12] vsock/virtio: " Arseniy Krasnov
2023-02-06  7:03 ` [RFC PATCH v1 10/12] net/sock: enable setting SO_ZEROCOPY for PF_VSOCK Arseniy Krasnov
2023-02-06  7:05 ` [RFC PATCH v1 11/12] test/vsock: MSG_ZEROCOPY flag tests Arseniy Krasnov
2023-02-06  7:06 ` [RFC PATCH v1 12/12] test/vsock: MSG_ZEROCOPY support for vsock_perf Arseniy Krasnov
2023-02-16 15:29   ` Stefano Garzarella
2023-02-16 15:29     ` Stefano Garzarella
2023-02-20  9:05     ` Krasnov Arseniy
2023-02-28 10:32       ` Stefano Garzarella
2023-02-28 10:32         ` Stefano Garzarella
2023-02-16 13:33 ` [RFC PATCH v1 00/12] vsock: MSG_ZEROCOPY flag support Stefano Garzarella
2023-02-16 13:33   ` Stefano Garzarella
2023-02-20  8:59   ` Krasnov Arseniy
2023-02-28 10:23     ` Stefano Garzarella
2023-02-28 10:23       ` Stefano Garzarella

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=716333a1-d6d1-3dde-d04a-365d4a361bfe@sberdevices.ru \
    --to=avkrasnov@sberdevices.ru \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=jasowang@redhat.com \
    --cc=kernel@sberdevices.ru \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=oxffffaa@gmail.com \
    --cc=pabeni@redhat.com \
    --cc=sgarzare@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.