All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tls: add zerocopy device sendpage
@ 2020-07-12 10:53 Boris Pismenny
  0 siblings, 0 replies; 12+ messages in thread
From: Boris Pismenny @ 2020-07-12 10:53 UTC (permalink / raw)
  To: kuba, john.fastabend, daniel, davem; +Cc: borisp, tariqt, netdev

Add support for zerocopy sendfile when using TLS device offload.
Before this patch, TLS device offload would copy sendfile data to a
bounce buffer. This can be avoided when the user knows that page cache
data is not modified. For example, when a serving static files.
Removing this copy improves performance significaintly, as TLS and TCP
sendfile perform the same operations, and the only overhead is TLS
header/trailer insertion.

This patch adds two configuration knobs to control TLS zerocopy sendfile:
1) socket option named TLS_TX_ZEROCOPY_SENDFILE that enables
applications to use zerocopy sendfile on a per-socket basis.
2) global sysctl named tls_zerocopy_sendfile that defines the default
for the entire system.

Non TLS device enabled sockets are not affected by this option,
and attempts to configure it will fail.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
---
 include/net/netns/ipv4.h   |  4 ++++
 include/net/tls.h          |  1 +
 include/uapi/linux/tls.h   |  1 +
 net/ipv4/sysctl_net_ipv4.c |  9 +++++++
 net/tls/tls_device.c       | 39 ++++++++++++++++++++++--------
 net/tls/tls_main.c         | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9e36738c1fe1..bc828d272151 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -196,6 +196,10 @@ struct netns_ipv4 {
 	int sysctl_igmp_llm_reports;
 	int sysctl_igmp_qrv;
 
+#ifdef CONFIG_TLS_DEVICE
+	int sysctl_tls_zerocopy_sendfile;
+#endif
+
 	struct ping_group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
diff --git a/include/net/tls.h b/include/net/tls.h
index e5dac7e74e79..f80985ac55de 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -172,6 +172,7 @@ struct tls_record_info {
 
 struct tls_offload_context_tx {
 	struct crypto_aead *aead_send;
+	bool zerocopy_sendpage;
 	spinlock_t lock;	/* protects records list */
 	struct list_head records_list;
 	struct tls_record_info *open_record;
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index bcd2869ed472..d6f65f5d206f 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -39,6 +39,7 @@
 /* TLS socket options */
 #define TLS_TX			1	/* Set transmit parameters */
 #define TLS_RX			2	/* Set receive parameters */
+#define TLS_TX_ZEROCOPY_SENDFILE	3	/* transmit zerocopy sendfile */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5653e3b011bf..0a0fc29225a2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1353,6 +1353,15 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ONE
 	},
+#ifdef CONFIG_TLS_DEVICE
+	{
+		.procname	= "tls_zerocopy_sendfile",
+		.data		= &init_net.ipv4.sysctl_tls_zerocopy_sendfile,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
 	{ }
 };
 
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 18fa6067bb7f..092b20428c15 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -413,7 +413,8 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
 static int tls_push_data(struct sock *sk,
 			 struct iov_iter *msg_iter,
 			 size_t size, int flags,
-			 unsigned char record_type)
+			 unsigned char record_type,
+			 struct page *zc_page)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -482,11 +483,21 @@ static int tls_push_data(struct sock *sk,
 		copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
 		copy = min_t(size_t, copy, (max_open_record_len - record->len));
 
-		rc = tls_device_copy_data(page_address(pfrag->page) +
-					  pfrag->offset, copy, msg_iter);
-		if (rc)
-			goto handle_error;
-		tls_append_frag(record, pfrag, copy);
+		if (!zc_page) {
+			rc = tls_device_copy_data(page_address(pfrag->page) +
+						  pfrag->offset, copy, msg_iter);
+			if (rc)
+				goto handle_error;
+			tls_append_frag(record, pfrag, copy);
+		} else {
+			struct page_frag _pfrag;
+
+			copy = min_t(size_t, size, (max_open_record_len - record->len));
+			_pfrag.page = zc_page;
+			_pfrag.offset = 0;
+			_pfrag.size = copy;
+			tls_append_frag(record, &_pfrag, copy);
+		}
 
 		size -= copy;
 		if (!size) {
@@ -548,7 +559,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	}
 
 	rc = tls_push_data(sk, &msg->msg_iter, size,
-			   msg->msg_flags, record_type);
+			   msg->msg_flags, record_type, NULL);
 
 out:
 	release_sock(sk);
@@ -560,9 +571,10 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
 			int offset, size_t size, int flags)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	struct iov_iter	msg_iter;
-	char *kaddr = kmap(page);
 	struct kvec iov;
+	char *kaddr;
 	int rc;
 
 	if (flags & MSG_SENDPAGE_NOTLAST)
@@ -576,11 +588,18 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
 		goto out;
 	}
 
+	if (ctx->zerocopy_sendpage) {
+		rc = tls_push_data(sk, &msg_iter, size,
+				   flags, TLS_RECORD_TYPE_DATA, page);
+		goto out;
+	}
+
+	kaddr = kmap(page);
 	iov.iov_base = kaddr + offset;
 	iov.iov_len = size;
 	iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
 	rc = tls_push_data(sk, &msg_iter, size,
-			   flags, TLS_RECORD_TYPE_DATA);
+			   flags, TLS_RECORD_TYPE_DATA, NULL);
 	kunmap(page);
 
 out:
@@ -654,7 +673,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 	struct iov_iter	msg_iter;
 
 	iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
-	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL);
 }
 
 void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index ec10041c6b7d..b95437c91339 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -422,6 +422,27 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
 	return rc;
 }
 
+static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval,
+				   int __user *optlen)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_tx *ctx;
+	int len;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	if (len < 0)
+		return -EINVAL;
+
+	if (!tls_ctx || tls_ctx->tx_conf != TLS_HW)
+		return -EBUSY;
+
+	ctx = tls_offload_ctx_tx(tls_ctx);
+	return ctx->zerocopy_sendpage;
+}
+
 static int do_tls_getsockopt(struct sock *sk, int optname,
 			     char __user *optval, int __user *optlen)
 {
@@ -431,6 +452,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
 	case TLS_TX:
 		rc = do_tls_getsockopt_tx(sk, optval, optlen);
 		break;
+	case TLS_TX_ZEROCOPY_SENDFILE:
+		rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		break;
@@ -450,6 +474,15 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
 	return do_tls_getsockopt(sk, optname, optval, optlen);
 }
 
+static void tls_set_tx_zerocopy_sendfile(struct tls_context *tls_ctx,
+					 int val)
+{
+	struct tls_offload_context_tx *ctx;
+
+	ctx = tls_offload_ctx_tx(tls_ctx);
+	ctx->zerocopy_sendpage = val;
+}
+
 static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 				  unsigned int optlen, int tx)
 {
@@ -533,8 +566,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 		rc = tls_set_device_offload(sk, ctx);
 		conf = TLS_HW;
 		if (!rc) {
+			int zc = sock_net(sk)->ipv4.sysctl_tls_zerocopy_sendfile;
+
 			TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
 			TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
+			tls_set_tx_zerocopy_sendfile(ctx, zc);
 		} else {
 			rc = tls_set_sw_offload(sk, ctx, 1);
 			if (rc)
@@ -579,6 +615,25 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 	return rc;
 }
 
+static int do_tls_setsockopt_tx_zc(struct sock *sk, char __user *optval,
+				   unsigned int optlen)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	int val;
+
+	if (!tls_ctx || tls_ctx->tx_conf != TLS_HW)
+		return -EINVAL;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	tls_set_tx_zerocopy_sendfile(tls_ctx, val);
+	return 0;
+}
+
 static int do_tls_setsockopt(struct sock *sk, int optname,
 			     char __user *optval, unsigned int optlen)
 {
@@ -592,6 +647,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
 					    optname == TLS_TX);
 		release_sock(sk);
 		break;
+	case TLS_TX_ZEROCOPY_SENDFILE:
+		lock_sock(sk);
+		rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
+		release_sock(sk);
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		break;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread
[parent not found: <1594550649-3097-1-git-send-email-borisp@mellanox.com>]

end of thread, other threads:[~2020-07-14 20:56 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-12 10:53 [PATCH] tls: add zerocopy device sendpage Boris Pismenny
     [not found] <1594550649-3097-1-git-send-email-borisp@mellanox.com>
     [not found] ` <20200712.153233.370000904740228888.davem@davemloft.net>
2020-07-13  7:49   ` Boris Pismenny
2020-07-13 19:05     ` David Miller
2020-07-13 22:15       ` Boris Pismenny
2020-07-13 22:59         ` Jakub Kicinski
2020-07-14  7:31           ` Boris Pismenny
2020-07-14 16:23             ` Jakub Kicinski
2020-07-14 20:38             ` David Miller
2020-07-14  1:02         ` David Miller
2020-07-14  7:27           ` Boris Pismenny
2020-07-14 20:42             ` David Miller
2020-07-14 20:56               ` Boris Pismenny

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.