From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7B3F9D2ED for ; Tue, 21 Mar 2023 16:46:48 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id AF0F9C433D2; Tue, 21 Mar 2023 16:46:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1679417208; bh=WDfHlVO2st+iOevzwE/nDW5RV1C6RKMK9eSTNsWcM3w=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=Z98y2uSdCYeELeumsz3RnoOIh6fZILLemSoYMP+4pKkGeHM6kfn1Y11/iv008B/F9 dT5qOmdYD3hbhcHQqHKfca3Jf16vC7MPPJnr+qTD77BTe6Q/LqOUFt2HjhuSpBAQ8b +2D9vSYTMSoUYZAjobtlTqwkEp5wO1TTdwXBDftMVVUGZS2dDV1x4Yk+YRMWU6sfZs FtePiEqSAMHFkJ59k1p2QwdZcg64SbXxF1F1nIyDQMjKJRfCdcVjWCiRSvn+1TR1o1 hTkuZdqmsuNDWNpaJXIOhePiGfGY5Tey/0DRZvbybwuRQwPJ2H1ETkL70KjjnN7xjJ WwEpA+L1GGZlA== Message-ID: <27bb222567051576261c950f039247ad5fa064ea.camel@kernel.org> Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code From: Jeff Layton To: Chuck Lever III Cc: Chuck Lever , Linux NFS Mailing List , "kernel-tls-handshake@lists.linux.dev" Date: Tue, 21 Mar 2023 12:46:46 -0400 In-Reply-To: References: <167932094748.3131.11264549266195745851.stgit@manet.1015granger.net> <167932228666.3131.1680559749292527734.stgit@manet.1015granger.net> <55c3480354ae273fceb67976bbce33b9e04e6cf3.camel@kernel.org> <81A90B73-3367-4D4E-9F60-A20CF6EE6BF9@oracle.com> Content-Type: text/plain; charset="ISO-8859-15" Content-Transfer-Encoding: quoted-printable User-Agent: Evolution 3.46.4 (3.46.4-1.fc37) Precedence: bulk X-Mailing-List: kernel-tls-handshake@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 On Tue, 2023-03-21 at 16:09 +0000, Chuck Lever III wrote: >=20 > > On Mar 21, 2023, at 10:56 AM, Jeff Layton wrote: > >=20 > > On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote: > > >=20 > > > > On Mar 21, 2023, at 7:43 AM, Jeff Layton wrote= : > > > >=20 > > > > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: > > > > > From: Chuck Lever > > > > >=20 > > > > > This patch adds opportunitistic RPC-with-TLS to the Linux in-kern= el > > > > > NFS server. If the client requests RPC-with-TLS and the user spac= e > > > > > handshake agent is running, the server will set up a TLS session. > > > > >=20 > > > > > There are no policy settings yet. For example, the server cannot > > > > > yet require the use of RPC-with-TLS to access its data. > > > > >=20 > > > > > Signed-off-by: Chuck Lever > > > > > --- > > > > > include/linux/sunrpc/svc_xprt.h | 5 ++ > > > > > include/linux/sunrpc/svcsock.h | 2 + > > > > > include/trace/events/sunrpc.h | 16 ++++++- > > > > > net/sunrpc/svc_xprt.c | 5 ++ > > > > > net/sunrpc/svcauth_unix.c | 11 ++++- > > > > > net/sunrpc/svcsock.c | 91 ++++++++++++++++++++++++++= +++++++++++++ > > > > > 6 files changed, 125 insertions(+), 5 deletions(-) > > > > >=20 > > > > > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunr= pc/svc_xprt.h > > > > > index 775368802762..867479204840 100644 > > > > > --- a/include/linux/sunrpc/svc_xprt.h > > > > > +++ b/include/linux/sunrpc/svc_xprt.h > > > > > @@ -27,7 +27,7 @@ struct svc_xprt_ops { > > > > > void (*xpo_detach)(struct svc_xprt *); > > > > > void (*xpo_free)(struct svc_xprt *); > > > > > void (*xpo_kill_temp_xprt)(struct svc_xprt *); > > > > > - void (*xpo_start_tls)(struct svc_xprt *); > > > > > + void (*xpo_handshake)(struct svc_xprt *xprt); > > > > > }; > > > > >=20 > > > > > struct svc_xprt_class { > > > > > @@ -70,6 +70,9 @@ struct svc_xprt { > > > > > #define XPT_LOCAL 12 /* connection from loopback interface */ > > > > > #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before clo= sing */ > > > > > #define XPT_CONG_CTRL 14 /* has congestion control */ > > > > > +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ > > > > > +#define XPT_TLS_SESSION 16 /* transport-layer security establis= hed */ > > > > > +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ > > > > >=20 > > > > > struct svc_serv *xpt_server; /* service for transport */ > > > > > atomic_t xpt_reserved; /* space on outq that is rsvd */ > > > > > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrp= c/svcsock.h > > > > > index bcc555c7ae9c..1175e1c38bac 100644 > > > > > --- a/include/linux/sunrpc/svcsock.h > > > > > +++ b/include/linux/sunrpc/svcsock.h > > > > > @@ -38,6 +38,8 @@ struct svc_sock { > > > > > /* Number of queued send requests */ > > > > > atomic_t sk_sendqlen; > > > > >=20 > > > > > + struct completion sk_handshake_done; > > > > > + > > > > > struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ > > > > > }; > > > > >=20 > > > > > diff --git a/include/trace/events/sunrpc.h b/include/trace/events= /sunrpc.h > > > > > index cf286a0a17d0..2667a8db4811 100644 > > > > > --- a/include/trace/events/sunrpc.h > > > > > +++ b/include/trace/events/sunrpc.h > > > > > @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, > > > > > { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ > > > > > { BIT(XPT_LOCAL), "LOCAL" }, \ > > > > > { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ > > > > > - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) > > > > > + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ > > > > > + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ > > > > > + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ > > > > > + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) > > > > >=20 > > > > > TRACE_EVENT(svc_xprt_create_err, > > > > > TP_PROTO( > > > > > @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); > > > > > DEFINE_SVC_XPRT_EVENT(detach); > > > > > DEFINE_SVC_XPRT_EVENT(free); > > > > >=20 > > > > > +#define DEFINE_SVC_TLS_EVENT(name) \ > > > > > + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ > > > > > + TP_PROTO(const struct svc_xprt *xprt), \ > > > > > + TP_ARGS(xprt)) > > > > > + > > > > > +DEFINE_SVC_TLS_EVENT(start); > > > > > +DEFINE_SVC_TLS_EVENT(upcall); > > > > > +DEFINE_SVC_TLS_EVENT(unavailable); > > > > > +DEFINE_SVC_TLS_EVENT(not_started); > > > > > +DEFINE_SVC_TLS_EVENT(timed_out); > > > > > + > > > > > TRACE_EVENT(svc_xprt_accept, > > > > > TP_PROTO( > > > > > const struct svc_xprt *xprt, > > > > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > > > > > index ba629297da4e..b68c04dbf876 100644 > > > > > --- a/net/sunrpc/svc_xprt.c > > > > > +++ b/net/sunrpc/svc_xprt.c > > > > > @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *x= prt) > > > > >=20 > > > > > if (xpt_flags & BIT(XPT_BUSY)) > > > > > return false; > > > > > - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) > > > > > + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDS= HAKE))) > > > > > return true; > > > > > if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { > > > > > if (xprt->xpt_ops->xpo_has_wspace(xprt) && > > > > > @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *r= qstp, struct svc_xprt *xprt) > > > > > module_put(xprt->xpt_class->xcl_owner); > > > > > } > > > > > svc_xprt_received(xprt); > > > > > + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { > > > > > + xprt->xpt_ops->xpo_handshake(xprt); > > > > > + svc_xprt_received(xprt); > > > > > } else if (svc_xprt_reserve_slot(rqstp, xprt)) { > > > > > /* XPT_DATA|XPT_DEFERRED case: */ > > > > > dprintk("svc: server %p, pool %u, transport %p, inuse=3D%d\n", > > > > > diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.= c > > > > > index 983c5891cb56..374995201df4 100644 > > > > > --- a/net/sunrpc/svcauth_unix.c > > > > > +++ b/net/sunrpc/svcauth_unix.c > > > > > @@ -17,8 +17,9 @@ > > > > > #include > > > > > #include > > > > > #include > > > > > -#define RPCDBG_FACILITY RPCDBG_AUTH > > > > > +#include > > > > >=20 > > > > > +#define RPCDBG_FACILITY RPCDBG_AUTH > > > > >=20 > > > > > #include "netns.h" > > > > >=20 > > > > > @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > > > > > { > > > > > struct xdr_stream *xdr =3D &rqstp->rq_arg_stream; > > > > > struct svc_cred *cred =3D &rqstp->rq_cred; > > > > > + struct svc_xprt *xprt =3D rqstp->rq_xprt; > > > > > u32 flavor, len; > > > > > void *body; > > > > > __be32 *p; > > > > > @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > > > > > if (cred->cr_group_info =3D=3D NULL) > > > > > return SVC_CLOSE; > > > > >=20 > > > > > - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { > > > > > + if (xprt->xpt_ops->xpo_handshake) { > > > > > p =3D xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8= ); > > > > > if (!p) > > > > > return SVC_CLOSE; > > > > > + trace_svc_tls_start(xprt); > > > > > *p++ =3D rpc_auth_null; > > > > > *p++ =3D cpu_to_be32(8); > > > > > memcpy(p, "STARTTLS", 8); > > > > > + > > > > > + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > > > + svc_xprt_enqueue(xprt); > > > > > } else { > > > > > + trace_svc_tls_unavailable(xprt); > > > > > if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, > > > > > RPC_AUTH_NULL, NULL, 0) < 0) > > > > > return SVC_CLOSE; > > > > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c > > > > > index b6df73cb706a..16ba8d6ab20e 100644 > > > > > --- a/net/sunrpc/svcsock.c > > > > > +++ b/net/sunrpc/svcsock.c > > > > > @@ -44,9 +44,11 @@ > > > > > #include > > > > > #include > > > > > #include > > > > > +#include > > > > > #include > > > > > #include > > > > > #include > > > > > +#include > > > > >=20 > > > > > #include > > > > > #include > > > > > @@ -64,6 +66,7 @@ > > > > >=20 > > > > > #define RPCDBG_FACILITY RPCDBG_SVCXPRT > > > > >=20 > > > > > +#define SVC_HANDSHAKE_TO (20U * HZ) > > > > >=20 > > > > > static struct svc_sock *svc_setup_socket(struct svc_serv *, struc= t socket *, > > > > > int flags); > > > > > @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) > > > > > rmb(); > > > > > svsk->sk_odata(sk); > > > > > trace_svcsock_data_ready(&svsk->sk_xprt, 0); > > > > > + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) > > > > > + return; > > > > > if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) > > > > > svc_xprt_enqueue(&svsk->sk_xprt); > > > > > } > > > > > @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct sv= c_xprt *xprt) > > > > > sock_no_linger(svsk->sk_sock->sk); > > > > > } > > > > >=20 > > > > > +/** > > > > > + * svc_tcp_handshake_done - Handshake completion handler > > > > > + * @data: address of xprt to wake > > > > > + * @status: status of handshake > > > > > + * @peerid: serial number of key containing the remote peer's id= entity > > > > > + * > > > > > + * If a security policy is specified as an export option, we don= 't > > > > > + * have a specific export here to check. So we set a "TLS sessio= n > > > > > + * is present" flag on the xprt and let an upper layer enforce l= ocal > > > > > + * security policy. > > > > > + */ > > > > > +static void svc_tcp_handshake_done(void *data, int status, key_s= erial_t peerid) > > > > > +{ > > > > > + struct svc_xprt *xprt =3D data; > > > > > + struct svc_sock *svsk =3D container_of(xprt, struct svc_sock, s= k_xprt); > > > > > + > > > > > + if (!status) { > > > > > + if (peerid !=3D TLS_NO_PEERID) > > > > > + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); > > > > > + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > > > > > + } > > > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > > > + complete_all(&svsk->sk_handshake_done); > > > > > +} > > > > > + > > > > > +/** > > > > > + * svc_tcp_handshake - Perform a transport-layer security handsh= ake > > > > > + * @xprt: connected transport endpoint > > > > > + * > > > > > + */ > > > > > +static void svc_tcp_handshake(struct svc_xprt *xprt) > > > > > +{ > > > > > + struct svc_sock *svsk =3D container_of(xprt, struct svc_sock, s= k_xprt); > > > > > + struct tls_handshake_args args =3D { > > > > > + .ta_sock =3D svsk->sk_sock, > > > > > + .ta_done =3D svc_tcp_handshake_done, > > > > > + .ta_data =3D xprt, > > > > > + }; > > > > > + int ret; > > > > > + > > > > > + trace_svc_tls_upcall(xprt); > > > > > + > > > > > + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > > > > > + init_completion(&svsk->sk_handshake_done); > > > > > + smp_wmb(); > > > > > + > > > > > + ret =3D tls_server_hello_x509(&args, GFP_KERNEL); > > > > > + if (ret) { > > > > > + trace_svc_tls_not_started(xprt); > > > > > + goto out_failed; > > > > > + } > > > > > + > > > > > + ret =3D wait_for_completion_interruptible_timeout(&svsk->sk_han= dshake_done, > > > > > + SVC_HANDSHAKE_TO); > > > >=20 > > > > Just curious: is this 20s timeout mandated by the spec? > > >=20 > > > The spec doesn't mandate a timeout. I simply wanted > > > to guarantee forward progress. > > >=20 > > >=20 > > > > It seems like a long time to block a kernel thread if so. > > >=20 > > > It's about the same as the client side timeout, fwiw. > > >=20 > > >=20 > > > > Do we need to be concerned > > > > with DoS attacks here? Could a client initiate handshakes and then = stop > > > > communicating, forcing the server to tie up threads with these 20s > > > > waits? > > >=20 > > > I think a malicious client can do all kinds of similar things > > > already. Do you have a particular timeout value in mind, or > > > is there some other mechanism we can use to better bullet- > > > proof this aspect of the handshake? I'm open to suggestion. > > >=20 > >=20 > > I don't have any suggestions, just trying to speculate about ways this > > could break. The only alternative I could see would be to defer the > > connection somehow until the reply comes in so that the thread can do > > other stuff in the meantime. >=20 > The server has a deferral mechanism, funnily enough. ;-) But > we could also just use a system workqueue. >=20 Yeah. I wasn't sure if it was suitable for use here, but using the deferral infrastructure might be the way to go. That code is quite difficult to follow though. It's one of sunrpc's dustier corners. I'm not sure I follow what you're thinking on workqueues. Do you mean "defer" the svc_rqst and then queue it to a workqueue when the reply comes in? That might be ok too. We did have those nfsd workqueue patches that we could base the design on. > Note that the handshake upcall mechanism itself has a limit > on the concurrent number of pending handshakes it will allow. > That might be enough to prevent a DoS. >=20 What happens when you hit the limit? Would it be possible for someone malicious to block legitimate TLS connections from getting through by stalling the handshake on a bunch of connections? It might be worthwhile to consider some sort of per-client-address limit as well. Nonetheless, I don't see this as a show stopper that would keep us from merging a working implementation. We can add this sort of mitigation later too. > > That's something we can potentially add > > later if we decide it's necessary though. >=20 > I shortened the timeout and added a comment suggesting this > as future work. >=20 Sounds good. >=20 > > > > > + if (ret <=3D 0) { > > > > > + if (tls_handshake_cancel(svsk->sk_sock)) { > > > > > + trace_svc_tls_timed_out(xprt); > > > > > + goto out_close; > > > > > + } > > > > > + } > > > > > + > > > > > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { > > > > > + trace_svc_tls_unavailable(xprt); > > > > > + goto out_close; > > > > > + } > > > > > + > > > > > + /* > > > > > + * Mark the transport ready in case the remote sent RPC > > > > > + * traffic before the kernel received the handshake > > > > > + * completion downcall. > > > > > + */ > > > > > + set_bit(XPT_DATA, &xprt->xpt_flags); > > > > > + svc_xprt_enqueue(xprt); > > > > > + return; > > > > > + > > > > > +out_close: > > > > > + set_bit(XPT_CLOSE, &xprt->xpt_flags); > > > > > +out_failed: > > > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > > > + set_bit(XPT_DATA, &xprt->xpt_flags); > > > > > + svc_xprt_enqueue(xprt); > > > > > +} > > > > > + > > > > > /* > > > > > * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo > > > > > */ > > > > > @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_op= s =3D { > > > > > .xpo_has_wspace =3D svc_tcp_has_wspace, > > > > > .xpo_accept =3D svc_tcp_accept, > > > > > .xpo_kill_temp_xprt =3D svc_tcp_kill_temp_xprt, > > > > > + .xpo_handshake =3D svc_tcp_handshake, > > > > > }; > > > > >=20 > > > > > static struct svc_xprt_class svc_tcp_class =3D { > > > > > @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *= xprt) > > > > > { > > > > > struct svc_sock *svsk =3D container_of(xprt, struct svc_sock, sk= _xprt); > > > > >=20 > > > > > + tls_handshake_cancel(svsk->sk_sock); > > > > > + > > > > > if (svsk->sk_sock->file) > > > > > sockfd_put(svsk->sk_sock); > > > > > else > > > > >=20 > > > > >=20 > > > >=20 > > > > --=20 > > > > Jeff Layton > > >=20 > > > -- > > > Chuck Lever > > >=20 > > >=20 > >=20 > > --=20 > > Jeff Layton >=20 > -- > Chuck Lever >=20 >=20 >=20 --=20 Jeff Layton