All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
	borisp@nvidia.com, john.fastabend@gmail.com, maximmi@nvidia.com,
	tariqt@nvidia.com, vfedorenko@novek.ru,
	Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net-next v2 01/11] tls: rx: allow only one reader at a time
Date: Thu, 14 Jul 2022 22:22:25 -0700	[thread overview]
Message-ID: <20220715052235.1452170-2-kuba@kernel.org> (raw)
In-Reply-To: <20220715052235.1452170-1-kuba@kernel.org>

recvmsg() in TLS gets data from the skb list (rx_list) or fresh
skbs we read from TCP via strparser. The former holds skbs which were
already decrypted for peek or decrypted and partially consumed.

tls_wait_data() only notices appearance of fresh skbs coming out
of TCP (or psock). It is possible, if there is a concurrent call
to peek() and recv() that the peek() will move the data from input
to rx_list without recv() noticing. recv() will then read data out
of order or never wake up.

This is not a practical use case/concern, but it makes the self
tests less reliable. This patch solves the problem by allowing
only one reader in.

Because having multiple processes calling read()/peek() is not
normal avoid adding a lock and try to fast-path the single reader
case.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h |  3 +++
 net/tls/tls_sw.c  | 61 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 8742e13bc362..e8935cfe0cd6 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -116,11 +116,14 @@ struct tls_sw_context_rx {
 	void (*saved_data_ready)(struct sock *sk);
 
 	struct sk_buff *recv_pkt;
+	u8 reader_present;
 	u8 async_capable:1;
 	u8 zc_capable:1;
+	u8 reader_contended:1;
 	atomic_t decrypt_pending;
 	/* protect crypto_wait with decrypt_pending*/
 	spinlock_t decrypt_compl_lock;
+	struct wait_queue_head wq;
 };
 
 struct tls_record_info {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 68d79ee48a56..761a63751616 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1753,6 +1753,51 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
 	sk_flush_backlog(sk);
 }
 
+static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+			       bool nonblock)
+{
+	long timeo;
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+
+	while (unlikely(ctx->reader_present)) {
+		DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+		ctx->reader_contended = 1;
+
+		add_wait_queue(&ctx->wq, &wait);
+		sk_wait_event(sk, &timeo,
+			      !READ_ONCE(ctx->reader_present), &wait);
+		remove_wait_queue(&ctx->wq, &wait);
+
+		if (!timeo)
+			return -EAGAIN;
+		if (signal_pending(current))
+			return sock_intr_errno(timeo);
+	}
+
+	WRITE_ONCE(ctx->reader_present, 1);
+
+	return timeo;
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+	if (unlikely(ctx->reader_contended)) {
+		if (wq_has_sleeper(&ctx->wq))
+			wake_up(&ctx->wq);
+		else
+			ctx->reader_contended = 0;
+
+		WARN_ON_ONCE(!ctx->reader_present);
+	}
+
+	WRITE_ONCE(ctx->reader_present, 0);
+	release_sock(sk);
+}
+
 int tls_sw_recvmsg(struct sock *sk,
 		   struct msghdr *msg,
 		   size_t len,
@@ -1782,7 +1827,9 @@ int tls_sw_recvmsg(struct sock *sk,
 		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
 
 	psock = sk_psock_get(sk);
-	lock_sock(sk);
+	timeo = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
+	if (timeo < 0)
+		return timeo;
 	bpf_strp_enabled = sk_psock_strp_enabled(psock);
 
 	/* If crypto failed the connection is broken */
@@ -1801,7 +1848,6 @@ int tls_sw_recvmsg(struct sock *sk,
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 	len = len - copied;
-	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
 		ctx->zc_capable;
@@ -1956,7 +2002,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	copied += decrypted;
 
 end:
-	release_sock(sk);
+	tls_rx_reader_unlock(sk, ctx);
 	if (psock)
 		sk_psock_put(sk, psock);
 	return copied ? : err;
@@ -1978,9 +2024,9 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	long timeo;
 	int chunk;
 
-	lock_sock(sk);
-
-	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+	timeo = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK);
+	if (timeo < 0)
+		return timeo;
 
 	from_queue = !skb_queue_empty(&ctx->rx_list);
 	if (from_queue) {
@@ -2029,7 +2075,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 splice_read_end:
-	release_sock(sk);
+	tls_rx_reader_unlock(sk, ctx);
 	return copied ? : err;
 }
 
@@ -2371,6 +2417,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	} else {
 		crypto_init_wait(&sw_ctx_rx->async_wait);
 		spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+		init_waitqueue_head(&sw_ctx_rx->wq);
 		crypto_info = &ctx->crypto_recv.info;
 		cctx = &ctx->rx;
 		skb_queue_head_init(&sw_ctx_rx->rx_list);
-- 
2.36.1


  reply	other threads:[~2022-07-15  5:22 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-15  5:22 [PATCH net-next v2 00/11] tls: rx: avoid skb_cow_data() Jakub Kicinski
2022-07-15  5:22 ` Jakub Kicinski [this message]
2022-07-20  8:37   ` [PATCH net-next v2 01/11] tls: rx: allow only one reader at a time Eric Dumazet
2022-07-20 16:59     ` Jakub Kicinski
2022-07-20 17:09       ` Eric Dumazet
2022-07-20 17:19         ` Eric Dumazet
2022-07-15  5:22 ` [PATCH net-next v2 02/11] tls: rx: don't try to keep the skbs always on the list Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 03/11] tls: rx: don't keep decrypted skbs on ctx->recv_pkt Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 04/11] tls: rx: remove the message decrypted tracking Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 05/11] tls: rx: factor out device darg update Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 06/11] tls: rx: read the input skb from ctx->recv_pkt Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 07/11] tls: rx: return the decrypted skb via darg Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 08/11] tls: rx: async: adjust record geometry immediately Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 09/11] tls: rx: async: hold onto the input skb Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 10/11] tls: rx: async: don't put async zc on the list Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 11/11] tls: rx: decrypt into a fresh skb Jakub Kicinski
2022-07-18 10:40 ` [PATCH net-next v2 00/11] tls: rx: avoid skb_cow_data() patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220715052235.1452170-2-kuba@kernel.org \
    --to=kuba@kernel.org \
    --cc=borisp@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=maximmi@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=tariqt@nvidia.com \
    --cc=vfedorenko@novek.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.