bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jakub Kicinski <jakub.kicinski@netronome.com>
To: john.fastabend@gmail.com, alexei.starovoitov@gmail.com,
	daniel@iogearbox.net
Cc: edumazet@google.com, netdev@vger.kernel.org, bpf@vger.kernel.org,
	Jakub Kicinski <jakub.kicinski@netronome.com>
Subject: [PATCH bpf v4 05/14] net/tls: fix transition through disconnect with close
Date: Fri, 19 Jul 2019 10:29:18 -0700	[thread overview]
Message-ID: <20190719172927.18181-6-jakub.kicinski@netronome.com> (raw)
In-Reply-To: <20190719172927.18181-1-jakub.kicinski@netronome.com>

From: John Fastabend <john.fastabend@gmail.com>

It is possible (via shutdown()) for TCP socks to go through TCP_CLOSE
state via tcp_disconnect() without actually calling tcp_close which
would then call the tls close callback. Because of this a user could
disconnect a socket then put it in a LISTEN state which would break
our assumptions about sockets always being ESTABLISHED state.

More directly because close() can call unhash() and unhash is
implemented by sockmap if a sockmap socket has TLS enabled we can
incorrectly destroy the psock from unhash() and then call its close
handler again. But because the psock (sockmap socket representation)
is already destroyed we call close handler in sk->prot. However,
in some cases (TLS BASE/BASE case) this will still point at the
sockmap close handler resulting in a circular call and crash reported
by syzbot.

To fix both above issues implement the unhash() routine for TLS.

v4:
 - add note about tls offload still needing the fix;
 - move sk_proto to the cold cache line;
 - split TX context free into "release" and "free",
   otherwise the GC work itself is in already freed
   memory;
 - more TX before RX for consistency;
 - reuse tls_ctx_free();
 - schedule the GC work after we're done with context
   to avoid UAF;
 - don't set the unhash in all modes, all modes "inherit"
   TLS_BASE's callbacks anyway;
 - disable the unhash hook for TLS_HW.

Fixes: 3c4d7559159bf ("tls: kernel TLS support")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 Documentation/networking/tls-offload.rst |  6 +++
 include/net/tls.h                        |  5 ++-
 net/tls/tls_main.c                       | 55 ++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index 048e5ca44824..8a1eeb393316 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -513,3 +513,9 @@ Redirects leak clear text
 
 In the RX direction, if segment has already been decrypted by the device
 and it gets redirected or mirrored - clear text will be transmitted out.
+
+shutdown() doesn't clear TLS state
+----------------------------------
+
+shutdown() system call allows for a TLS socket to be reused as a different
+connection. Offload doesn't currently handle that.
diff --git a/include/net/tls.h b/include/net/tls.h
index 235508e35fd4..9e425ac2de45 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -271,6 +271,8 @@ struct tls_context {
 	unsigned long flags;
 
 	/* cache cold stuff */
+	struct proto *sk_proto;
+
 	void (*sk_destruct)(struct sock *sk);
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
@@ -288,6 +290,8 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
+
+	struct work_struct gc;
 };
 
 enum tls_offload_ctx_dir {
@@ -359,7 +363,6 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
-void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_release_resources_tx(struct sock *sk);
 void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d152a00a7a27..48f1c26459d0 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -261,6 +261,33 @@ void tls_ctx_free(struct tls_context *ctx)
 	kfree(ctx);
 }
 
+static void tls_ctx_free_deferred(struct work_struct *gc)
+{
+	struct tls_context *ctx = container_of(gc, struct tls_context, gc);
+
+	/* Ensure any remaining work items are completed. The sk will
+	 * already have lost its tls_ctx reference by the time we get
+	 * here so no xmit operation will actually be performed.
+	 */
+	if (ctx->tx_conf == TLS_SW) {
+		tls_sw_cancel_work_tx(ctx);
+		tls_sw_free_ctx_tx(ctx);
+	}
+
+	if (ctx->rx_conf == TLS_SW) {
+		tls_sw_strparser_done(ctx);
+		tls_sw_free_ctx_rx(ctx);
+	}
+
+	tls_ctx_free(ctx);
+}
+
+static void tls_ctx_free_wq(struct tls_context *ctx)
+{
+	INIT_WORK(&ctx->gc, tls_ctx_free_deferred);
+	schedule_work(&ctx->gc);
+}
+
 static void tls_sk_proto_cleanup(struct sock *sk,
 				 struct tls_context *ctx, long timeo)
 {
@@ -288,6 +315,26 @@ static void tls_sk_proto_cleanup(struct sock *sk,
 #endif
 }
 
+static void tls_sk_proto_unhash(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	struct tls_context *ctx;
+
+	if (unlikely(!icsk->icsk_ulp_data)) {
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+	}
+
+	ctx = tls_get_ctx(sk);
+	tls_sk_proto_cleanup(sk, ctx, timeo);
+	icsk->icsk_ulp_data = NULL;
+
+	if (ctx->sk_proto->unhash)
+		ctx->sk_proto->unhash(sk);
+	tls_ctx_free_wq(ctx);
+}
+
 static void tls_sk_proto_close(struct sock *sk, long timeout)
 {
 	void (*sk_proto_close)(struct sock *sk, long timeout);
@@ -305,6 +352,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE)
 		tls_sk_proto_cleanup(sk, ctx, timeo);
 
+	sk->sk_prot = ctx->sk_proto;
 	release_sock(sk);
 	if (ctx->tx_conf == TLS_SW)
 		tls_sw_free_ctx_tx(ctx);
@@ -608,6 +656,7 @@ static struct tls_context *create_ctx(struct sock *sk)
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
+	ctx->unhash = sk->sk_prot->unhash;
 	return ctx;
 }
 
@@ -731,6 +780,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_BASE][TLS_BASE].setsockopt	= tls_setsockopt;
 	prot[TLS_BASE][TLS_BASE].getsockopt	= tls_getsockopt;
 	prot[TLS_BASE][TLS_BASE].close		= tls_sk_proto_close;
+	prot[TLS_BASE][TLS_BASE].unhash		= tls_sk_proto_unhash;
 
 	prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_SW][TLS_BASE].sendmsg		= tls_sw_sendmsg;
@@ -748,16 +798,20 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 
 #ifdef CONFIG_TLS_DEVICE
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+	prot[TLS_HW][TLS_BASE].unhash		= base->unhash;
 	prot[TLS_HW][TLS_BASE].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_BASE].sendpage		= tls_device_sendpage;
 
 	prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
+	prot[TLS_HW][TLS_SW].unhash		= base->unhash;
 	prot[TLS_HW][TLS_SW].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_SW].sendpage		= tls_device_sendpage;
 
 	prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
+	prot[TLS_BASE][TLS_HW].unhash		= base->unhash;
 
 	prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW];
+	prot[TLS_SW][TLS_HW].unhash		= base->unhash;
 
 	prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
@@ -794,6 +848,7 @@ static int tls_init(struct sock *sk)
 	tls_build_proto(sk);
 	ctx->tx_conf = TLS_BASE;
 	ctx->rx_conf = TLS_BASE;
+	ctx->sk_proto = sk->sk_prot;
 	update_sk_prot(sk, ctx);
 out:
 	return rc;
-- 
2.21.0


  parent reply	other threads:[~2019-07-19 17:31 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-19 17:29 [PATCH bpf v4 00/14] sockmap/tls fixes Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 01/14] net/tls: don't arm strparser immediately in tls_set_sw_offload() Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 02/14] net/tls: don't call tls_sk_proto_close for hw record offload Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 03/14] net/tls: remove close callback sock unlock/lock around TX work flush Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 04/14] net/tls: remove sock unlock/lock around strp_done() Jakub Kicinski
2019-07-19 17:29 ` Jakub Kicinski [this message]
2019-07-19 17:29 ` [PATCH bpf v4 06/14] bpf: sockmap, sock_map_delete needs to use xchg Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 07/14] bpf: sockmap, synchronize_rcu before free'ing map Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 08/14] bpf: sockmap, only create entry if ulp is not already enabled Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 09/14] bpf: sockmap/tls, close can race with map free Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 10/14] selftests/tls: add a test for ULP but no keys Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 11/14] selftests/tls: test error codes around TLS ULP installation Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 12/14] selftests/tls: add a bidirectional test Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 13/14] selftests/tls: close the socket with open record Jakub Kicinski
2019-07-19 17:29 ` [PATCH bpf v4 14/14] selftests/tls: add shutdown tests Jakub Kicinski
2019-07-19 17:37 ` [PATCH bpf v4 00/14] sockmap/tls fixes Jakub Kicinski
2019-07-22 14:22   ` Daniel Borkmann
2019-07-22 15:48     ` John Fastabend
2019-07-22 15:46   ` John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190719172927.18181-6-jakub.kicinski@netronome.com \
    --to=jakub.kicinski@netronome.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=edumazet@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).