All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
To: Stefan Hajnoczi <stefanha@redhat.com>,
	Stefano Garzarella <sgarzare@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Jason Wang <jasowang@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	"Jakub Kicinski" <kuba@kernel.org>,
	Paolo Abeni <pabeni@redhat.com>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	"virtualization@lists.linux-foundation.org" 
	<virtualization@lists.linux-foundation.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	kernel <kernel@sberdevices.ru>,
	Arseniy Krasnov <AVKrasnov@sberdevices.ru>,
	Krasnov Arseniy <oxffffaa@gmail.com>
Subject: [RFC PATCH v1 3/8] af_vsock: add zerocopy receive logic
Date: Thu, 12 May 2022 05:12:40 +0000	[thread overview]
Message-ID: <44d2404f-dc4f-f42c-1235-2ad7f537a030@sberdevices.ru> (raw)
In-Reply-To: <7cdcb1e1-7c97-c054-19cf-5caeacae981d@sberdevices.ru>

This:
1) Adds callback for 'mmap()' call on socket. It
   checks vm area flags and sets vm area ops.
2) Adds special 'getsockopt()' case which calls
   transport zerocopy callback. Input argument is
   vm area address.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 include/net/af_vsock.h          |  4 +++
 include/uapi/linux/vm_sockets.h |  2 ++
 net/vmw_vsock/af_vsock.c        | 61 +++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8..d0aefb9ee4cf 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -135,6 +135,10 @@ struct vsock_transport {
 	bool (*stream_is_active)(struct vsock_sock *);
 	bool (*stream_allow)(u32 cid, u32 port);
 
+	int (*zerocopy_dequeue)(struct vsock_sock *vsk,
+				struct vm_area_struct *vma,
+				unsigned long addr);
+
 	/* SEQ_PACKET. */
 	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
 				     int flags);
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index c60ca33eac59..62aec51a2bc3 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -83,6 +83,8 @@
 
 #define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8
 
+#define SO_VM_SOCKETS_ZEROCOPY 9
+
 #if !defined(__KERNEL__)
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
 #define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e..3f98477ea546 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1652,6 +1652,42 @@ static int vsock_connectible_setsockopt(struct socket *sock,
 	return err;
 }
 
+static const struct vm_operations_struct afvsock_vm_ops = {
+};
+
+static int vsock_recv_zerocopy(struct socket *sock,
+			       unsigned long address)
+{
+	struct sock *sk = sock->sk;
+	struct vsock_sock *vsk = vsock_sk(sk);
+	struct vm_area_struct *vma;
+	const struct vsock_transport *transport;
+	int res;
+
+	transport = vsk->transport;
+
+	if (!transport->zerocopy_dequeue)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+	mmap_write_lock(current->mm);
+
+	vma = vma_lookup(current->mm, address);
+
+	if (!vma || vma->vm_ops != &afvsock_vm_ops) {
+		mmap_write_unlock(current->mm);
+		release_sock(sk);
+		return -EINVAL;
+	}
+
+	res = transport->zerocopy_dequeue(vsk, vma, address);
+
+	mmap_write_unlock(current->mm);
+	release_sock(sk);
+
+	return res;
+}
+
 static int vsock_connectible_getsockopt(struct socket *sock,
 					int level, int optname,
 					char __user *optval,
@@ -1696,6 +1732,17 @@ static int vsock_connectible_getsockopt(struct socket *sock,
 		lv = sock_get_timeout(vsk->connect_timeout, &v,
 				      optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
 		break;
+	case SO_VM_SOCKETS_ZEROCOPY: {
+		unsigned long vma_addr;
+
+		if (len < sizeof(vma_addr))
+			return -EINVAL;
+
+		if (copy_from_user(&vma_addr, optval, sizeof(vma_addr)))
+			return -EFAULT;
+
+		return vsock_recv_zerocopy(sock, vma_addr);
+	}
 
 	default:
 		return -ENOPROTOOPT;
@@ -2124,6 +2171,19 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	return err;
 }
 
+static int afvsock_mmap(struct file *file, struct socket *sock,
+			struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+		return -EPERM;
+
+	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+	vma->vm_flags |= (VM_MIXEDMAP);
+	vma->vm_ops = &afvsock_vm_ops;
+
+	return 0;
+}
+
 static const struct proto_ops vsock_stream_ops = {
 	.family = PF_VSOCK,
 	.owner = THIS_MODULE,
@@ -2143,6 +2203,7 @@ static const struct proto_ops vsock_stream_ops = {
 	.recvmsg = vsock_connectible_recvmsg,
 	.mmap = sock_no_mmap,
 	.sendpage = sock_no_sendpage,
+	.mmap = afvsock_mmap,
 };
 
 static const struct proto_ops vsock_seqpacket_ops = {
-- 
2.25.1

  parent reply	other threads:[~2022-05-12  5:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-12  5:04 [RFC PATCH v1 0/8] virtio/vsock: experimental zerocopy receive Arseniy Krasnov
2022-05-12  5:06 ` [RFC PATCH v1 1/8] virtio/vsock: rework packet allocation logic Arseniy Krasnov
2022-05-12  5:09 ` [RFC PATCH v1 2/8] vhost/vsock: " Arseniy Krasnov
2022-05-12  5:12 ` Arseniy Krasnov [this message]
2022-05-12  5:14 ` [RFC PATCH v1 4/8] virtio/vsock: add transport zerocopy callback Arseniy Krasnov
2022-05-12  5:17 ` [RFC PATCH v1 5/8] vhost/vsock: enable " Arseniy Krasnov
2022-05-12  5:18 ` [RFC PATCH v1 6/8] virtio/vsock: " Arseniy Krasnov
2022-05-12  5:20 ` [RFC PATCH v1 7/8] test/vsock: add receive zerocopy tests Arseniy Krasnov
2022-05-12  5:22 ` [RFC PATCH v1 8/8] test/vsock: vsock rx zerocopy utility Arseniy Krasnov
2022-05-17 15:14 ` [RFC PATCH v1 0/8] virtio/vsock: experimental zerocopy receive Stefano Garzarella
2022-05-17 15:14   ` Stefano Garzarella
2022-05-18 11:04   ` Arseniy Krasnov
2022-05-19  7:42     ` Stefano Garzarella
2022-05-19  7:42       ` Stefano Garzarella
2022-05-20 11:09       ` Arseniy Krasnov
2022-05-24  7:32         ` Stefano Garzarella
2022-05-24  7:32           ` Stefano Garzarella
2022-06-07 10:26           ` Arseniy Krasnov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44d2404f-dc4f-f42c-1235-2ad7f537a030@sberdevices.ru \
    --to=avkrasnov@sberdevices.ru \
    --cc=davem@davemloft.net \
    --cc=jasowang@redhat.com \
    --cc=kernel@sberdevices.ru \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=oxffffaa@gmail.com \
    --cc=pabeni@redhat.com \
    --cc=sgarzare@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.