From: Chuck Lever III <chuck.lever@oracle.com>
To: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <cel@kernel.org>,
Linux NFS Mailing List <linux-nfs@vger.kernel.org>,
"kernel-tls-handshake@lists.linux.dev"
<kernel-tls-handshake@lists.linux.dev>
Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code
Date: Tue, 21 Mar 2023 16:09:37 +0000 [thread overview]
Message-ID: <CB9BF445-99C3-4EEA-9B7A-9DF4CE5659F3@oracle.com> (raw)
In-Reply-To: <d9b1dae62fed0f61bb48a017719f0f0114fed3c2.camel@kernel.org>
> On Mar 21, 2023, at 10:56 AM, Jeff Layton <jlayton@kernel.org> wrote:
>
> On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote:
>>
>>> On Mar 21, 2023, at 7:43 AM, Jeff Layton <jlayton@kernel.org> wrote:
>>>
>>> On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
>>>> From: Chuck Lever <chuck.lever@oracle.com>
>>>>
>>>> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
>>>> NFS server. If the client requests RPC-with-TLS and the user space
>>>> handshake agent is running, the server will set up a TLS session.
>>>>
>>>> There are no policy settings yet. For example, the server cannot
>>>> yet require the use of RPC-with-TLS to access its data.
>>>>
>>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>>> ---
>>>> include/linux/sunrpc/svc_xprt.h | 5 ++
>>>> include/linux/sunrpc/svcsock.h | 2 +
>>>> include/trace/events/sunrpc.h | 16 ++++++-
>>>> net/sunrpc/svc_xprt.c | 5 ++
>>>> net/sunrpc/svcauth_unix.c | 11 ++++-
>>>> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
>>>> 6 files changed, 125 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
>>>> index 775368802762..867479204840 100644
>>>> --- a/include/linux/sunrpc/svc_xprt.h
>>>> +++ b/include/linux/sunrpc/svc_xprt.h
>>>> @@ -27,7 +27,7 @@ struct svc_xprt_ops {
>>>> void (*xpo_detach)(struct svc_xprt *);
>>>> void (*xpo_free)(struct svc_xprt *);
>>>> void (*xpo_kill_temp_xprt)(struct svc_xprt *);
>>>> - void (*xpo_start_tls)(struct svc_xprt *);
>>>> + void (*xpo_handshake)(struct svc_xprt *xprt);
>>>> };
>>>>
>>>> struct svc_xprt_class {
>>>> @@ -70,6 +70,9 @@ struct svc_xprt {
>>>> #define XPT_LOCAL 12 /* connection from loopback interface */
>>>> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
>>>> #define XPT_CONG_CTRL 14 /* has congestion control */
>>>> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
>>>> +#define XPT_TLS_SESSION 16 /* transport-layer security established */
>>>> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */
>>>>
>>>> struct svc_serv *xpt_server; /* service for transport */
>>>> atomic_t xpt_reserved; /* space on outq that is rsvd */
>>>> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
>>>> index bcc555c7ae9c..1175e1c38bac 100644
>>>> --- a/include/linux/sunrpc/svcsock.h
>>>> +++ b/include/linux/sunrpc/svcsock.h
>>>> @@ -38,6 +38,8 @@ struct svc_sock {
>>>> /* Number of queued send requests */
>>>> atomic_t sk_sendqlen;
>>>>
>>>> + struct completion sk_handshake_done;
>>>> +
>>>> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
>>>> };
>>>>
>>>> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
>>>> index cf286a0a17d0..2667a8db4811 100644
>>>> --- a/include/trace/events/sunrpc.h
>>>> +++ b/include/trace/events/sunrpc.h
>>>> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
>>>> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
>>>> { BIT(XPT_LOCAL), "LOCAL" }, \
>>>> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
>>>> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
>>>> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
>>>> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
>>>> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
>>>> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
>>>>
>>>> TRACE_EVENT(svc_xprt_create_err,
>>>> TP_PROTO(
>>>> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
>>>> DEFINE_SVC_XPRT_EVENT(detach);
>>>> DEFINE_SVC_XPRT_EVENT(free);
>>>>
>>>> +#define DEFINE_SVC_TLS_EVENT(name) \
>>>> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
>>>> + TP_PROTO(const struct svc_xprt *xprt), \
>>>> + TP_ARGS(xprt))
>>>> +
>>>> +DEFINE_SVC_TLS_EVENT(start);
>>>> +DEFINE_SVC_TLS_EVENT(upcall);
>>>> +DEFINE_SVC_TLS_EVENT(unavailable);
>>>> +DEFINE_SVC_TLS_EVENT(not_started);
>>>> +DEFINE_SVC_TLS_EVENT(timed_out);
>>>> +
>>>> TRACE_EVENT(svc_xprt_accept,
>>>> TP_PROTO(
>>>> const struct svc_xprt *xprt,
>>>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>>>> index ba629297da4e..b68c04dbf876 100644
>>>> --- a/net/sunrpc/svc_xprt.c
>>>> +++ b/net/sunrpc/svc_xprt.c
>>>> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
>>>>
>>>> if (xpt_flags & BIT(XPT_BUSY))
>>>> return false;
>>>> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
>>>> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
>>>> return true;
>>>> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
>>>> if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
>>>> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>>>> module_put(xprt->xpt_class->xcl_owner);
>>>> }
>>>> svc_xprt_received(xprt);
>>>> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
>>>> + xprt->xpt_ops->xpo_handshake(xprt);
>>>> + svc_xprt_received(xprt);
>>>> } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
>>>> /* XPT_DATA|XPT_DEFERRED case: */
>>>> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
>>>> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
>>>> index 983c5891cb56..374995201df4 100644
>>>> --- a/net/sunrpc/svcauth_unix.c
>>>> +++ b/net/sunrpc/svcauth_unix.c
>>>> @@ -17,8 +17,9 @@
>>>> #include <net/ipv6.h>
>>>> #include <linux/kernel.h>
>>>> #include <linux/user_namespace.h>
>>>> -#define RPCDBG_FACILITY RPCDBG_AUTH
>>>> +#include <trace/events/sunrpc.h>
>>>>
>>>> +#define RPCDBG_FACILITY RPCDBG_AUTH
>>>>
>>>> #include "netns.h"
>>>>
>>>> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>>>> {
>>>> struct xdr_stream *xdr = &rqstp->rq_arg_stream;
>>>> struct svc_cred *cred = &rqstp->rq_cred;
>>>> + struct svc_xprt *xprt = rqstp->rq_xprt;
>>>> u32 flavor, len;
>>>> void *body;
>>>> __be32 *p;
>>>> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>>>> if (cred->cr_group_info == NULL)
>>>> return SVC_CLOSE;
>>>>
>>>> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
>>>> + if (xprt->xpt_ops->xpo_handshake) {
>>>> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
>>>> if (!p)
>>>> return SVC_CLOSE;
>>>> + trace_svc_tls_start(xprt);
>>>> *p++ = rpc_auth_null;
>>>> *p++ = cpu_to_be32(8);
>>>> memcpy(p, "STARTTLS", 8);
>>>> +
>>>> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>>>> + svc_xprt_enqueue(xprt);
>>>> } else {
>>>> + trace_svc_tls_unavailable(xprt);
>>>> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
>>>> RPC_AUTH_NULL, NULL, 0) < 0)
>>>> return SVC_CLOSE;
>>>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>>>> index b6df73cb706a..16ba8d6ab20e 100644
>>>> --- a/net/sunrpc/svcsock.c
>>>> +++ b/net/sunrpc/svcsock.c
>>>> @@ -44,9 +44,11 @@
>>>> #include <net/tcp.h>
>>>> #include <net/tcp_states.h>
>>>> #include <net/tls.h>
>>>> +#include <net/handshake.h>
>>>> #include <linux/uaccess.h>
>>>> #include <linux/highmem.h>
>>>> #include <asm/ioctls.h>
>>>> +#include <linux/key.h>
>>>>
>>>> #include <linux/sunrpc/types.h>
>>>> #include <linux/sunrpc/clnt.h>
>>>> @@ -64,6 +66,7 @@
>>>>
>>>> #define RPCDBG_FACILITY RPCDBG_SVCXPRT
>>>>
>>>> +#define SVC_HANDSHAKE_TO (20U * HZ)
>>>>
>>>> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
>>>> int flags);
>>>> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
>>>> rmb();
>>>> svsk->sk_odata(sk);
>>>> trace_svcsock_data_ready(&svsk->sk_xprt, 0);
>>>> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
>>>> + return;
>>>> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
>>>> svc_xprt_enqueue(&svsk->sk_xprt);
>>>> }
>>>> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
>>>> sock_no_linger(svsk->sk_sock->sk);
>>>> }
>>>>
>>>> +/**
>>>> + * svc_tcp_handshake_done - Handshake completion handler
>>>> + * @data: address of xprt to wake
>>>> + * @status: status of handshake
>>>> + * @peerid: serial number of key containing the remote peer's identity
>>>> + *
>>>> + * If a security policy is specified as an export option, we don't
>>>> + * have a specific export here to check. So we set a "TLS session
>>>> + * is present" flag on the xprt and let an upper layer enforce local
>>>> + * security policy.
>>>> + */
>>>> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
>>>> +{
>>>> + struct svc_xprt *xprt = data;
>>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>>> +
>>>> + if (!status) {
>>>> + if (peerid != TLS_NO_PEERID)
>>>> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
>>>> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
>>>> + }
>>>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>>>> + complete_all(&svsk->sk_handshake_done);
>>>> +}
>>>> +
>>>> +/**
>>>> + * svc_tcp_handshake - Perform a transport-layer security handshake
>>>> + * @xprt: connected transport endpoint
>>>> + *
>>>> + */
>>>> +static void svc_tcp_handshake(struct svc_xprt *xprt)
>>>> +{
>>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>>> + struct tls_handshake_args args = {
>>>> + .ta_sock = svsk->sk_sock,
>>>> + .ta_done = svc_tcp_handshake_done,
>>>> + .ta_data = xprt,
>>>> + };
>>>> + int ret;
>>>> +
>>>> + trace_svc_tls_upcall(xprt);
>>>> +
>>>> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
>>>> + init_completion(&svsk->sk_handshake_done);
>>>> + smp_wmb();
>>>> +
>>>> + ret = tls_server_hello_x509(&args, GFP_KERNEL);
>>>> + if (ret) {
>>>> + trace_svc_tls_not_started(xprt);
>>>> + goto out_failed;
>>>> + }
>>>> +
>>>> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
>>>> + SVC_HANDSHAKE_TO);
>>>
>>> Just curious: is this 20s timeout mandated by the spec?
>>
>> The spec doesn't mandate a timeout. I simply wanted
>> to guarantee forward progress.
>>
>>
>>> It seems like a long time to block a kernel thread if so.
>>
>> It's about the same as the client side timeout, fwiw.
>>
>>
>>> Do we need to be concerned
>>> with DoS attacks here? Could a client initiate handshakes and then stop
>>> communicating, forcing the server to tie up threads with these 20s
>>> waits?
>>
>> I think a malicious client can do all kinds of similar things
>> already. Do you have a particular timeout value in mind, or
>> is there some other mechanism we can use to better bullet-
>> proof this aspect of the handshake? I'm open to suggestion.
>>
>
> I don't have any suggestions, just trying to speculate about ways this
> could break. The only alternative I could see would be to defer the
> connection somehow until the reply comes in so that the thread can do
> other stuff in the meantime.
The server has a deferral mechanism, funnily enough. ;-) But
we could also just use a system workqueue.
Note that the handshake upcall mechanism itself has a limit
on the concurrent number of pending handshakes it will allow.
That might be enough to prevent a DoS.
> That's something we can potentially add
> later if we decide it's necessary though.
I shortened the timeout and added a comment suggesting this
as future work.
>>>> + if (ret <= 0) {
>>>> + if (tls_handshake_cancel(svsk->sk_sock)) {
>>>> + trace_svc_tls_timed_out(xprt);
>>>> + goto out_close;
>>>> + }
>>>> + }
>>>> +
>>>> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
>>>> + trace_svc_tls_unavailable(xprt);
>>>> + goto out_close;
>>>> + }
>>>> +
>>>> + /*
>>>> + * Mark the transport ready in case the remote sent RPC
>>>> + * traffic before the kernel received the handshake
>>>> + * completion downcall.
>>>> + */
>>>> + set_bit(XPT_DATA, &xprt->xpt_flags);
>>>> + svc_xprt_enqueue(xprt);
>>>> + return;
>>>> +
>>>> +out_close:
>>>> + set_bit(XPT_CLOSE, &xprt->xpt_flags);
>>>> +out_failed:
>>>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>>>> + set_bit(XPT_DATA, &xprt->xpt_flags);
>>>> + svc_xprt_enqueue(xprt);
>>>> +}
>>>> +
>>>> /*
>>>> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
>>>> */
>>>> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
>>>> .xpo_has_wspace = svc_tcp_has_wspace,
>>>> .xpo_accept = svc_tcp_accept,
>>>> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
>>>> + .xpo_handshake = svc_tcp_handshake,
>>>> };
>>>>
>>>> static struct svc_xprt_class svc_tcp_class = {
>>>> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
>>>> {
>>>> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>>>
>>>> + tls_handshake_cancel(svsk->sk_sock);
>>>> +
>>>> if (svsk->sk_sock->file)
>>>> sockfd_put(svsk->sk_sock);
>>>> else
>>>>
>>>>
>>>
>>> --
>>> Jeff Layton <jlayton@kernel.org>
>>
>> --
>> Chuck Lever
>>
>>
>
> --
> Jeff Layton <jlayton@kernel.org>
--
Chuck Lever
next prev parent reply other threads:[~2023-03-21 16:09 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-20 14:24 [PATCH RFC 0/5] NFSD support for RPC-with-TLS Chuck Lever
2023-03-20 14:24 ` [PATCH RFC 1/5] SUNRPC: Revert 987c7b1d094d Chuck Lever
2023-03-20 14:24 ` [PATCH RFC 2/5] SUNRPC: Recognize control messages in server-side TCP socket code Chuck Lever
2023-03-20 14:24 ` [PATCH RFC 3/5] SUNRPC: Ensure server-side sockets have a sock->file Chuck Lever
2023-03-20 14:24 ` [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code Chuck Lever
2023-03-21 11:43 ` Jeff Layton
2023-03-21 14:03 ` Chuck Lever III
2023-03-21 14:56 ` Jeff Layton
2023-03-21 16:09 ` Chuck Lever III [this message]
2023-03-21 16:46 ` Jeff Layton
2023-03-20 14:24 ` [PATCH RFC 5/5] NFSD: Handle new xprtsec= export option Chuck Lever
2023-03-21 11:50 ` Jeff Layton
2023-03-21 14:05 ` Chuck Lever III
2023-03-21 15:10 ` Jeff Layton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CB9BF445-99C3-4EEA-9B7A-9DF4CE5659F3@oracle.com \
--to=chuck.lever@oracle.com \
--cc=cel@kernel.org \
--cc=jlayton@kernel.org \
--cc=kernel-tls-handshake@lists.linux.dev \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).