All of lore.kernel.org
 help / color / mirror / Atom feed
From: Trond Myklebust <trond.myklebust@primarydata.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH v2 01/14] SUNRPC: Set SO_REUSEPORT socket option for TCP connections
Date: Mon,  9 Feb 2015 12:18:31 -0500	[thread overview]
Message-ID: <1423502324-25981-2-git-send-email-trond.myklebust@primarydata.com> (raw)
In-Reply-To: <1423502324-25981-1-git-send-email-trond.myklebust@primarydata.com>

When using TCP, we need the ability to reuse port numbers after
a disconnection, so that the NFSv3 server knows that we're the same
client. Currently we use a hack to work around the TCP socket's
TIME_WAIT: we send an RST instead of closing, which doesn't
always work...
The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple
TCP connections to the same source address+port combination, and thus
to use ordinary TCP close() instead of the current hack.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/xprtsock.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 87ce7e8bb8dc..484c5040436a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1667,6 +1667,39 @@ static unsigned short xs_get_random_port(void)
 }
 
 /**
+ * xs_set_reuseaddr_port - set the socket's port and address reuse options
+ * @sock: socket
+ *
+ * Note that this function has to be called on all sockets that share the
+ * same port, and it must be called before binding.
+ */
+static void xs_sock_set_reuseport(struct socket *sock)
+{
+	char opt = 1;
+
+	kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+}
+
+static unsigned short xs_sock_getport(struct socket *sock)
+{
+	struct sockaddr_storage buf;
+	int buflen;
+	unsigned short port = 0;
+
+	if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+		goto out;
+	switch (buf.ss_family) {
+	case AF_INET6:
+		port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port);
+		break;
+	case AF_INET:
+		port = ntohs(((struct sockaddr_in *)&buf)->sin_port);
+	}
+out:
+	return port;
+}
+
+/**
  * xs_set_port - reset the port number in the remote endpoint address
  * @xprt: generic transport
  * @port: new port number
@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 	xs_update_peer_port(xprt);
 }
 
+static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
+{
+	if (transport->srcport == 0)
+		transport->srcport = xs_sock_getport(sock);
+}
+
 static unsigned short xs_get_srcport(struct sock_xprt *transport)
 {
 	unsigned short port = transport->srcport;
@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work)
 }
 
 static struct socket *xs_create_sock(struct rpc_xprt *xprt,
-		struct sock_xprt *transport, int family, int type, int protocol)
+		struct sock_xprt *transport, int family, int type,
+		int protocol, bool reuseport)
 {
 	struct socket *sock;
 	int err;
@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
 	}
 	xs_reclassify_socket(family, sock);
 
+	if (reuseport)
+		xs_sock_set_reuseport(sock);
+
 	err = xs_bind(transport, sock);
 	if (err) {
 		sock_release(sock);
@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
 	/* Start by resetting any existing state */
 	xs_reset_transport(transport);
 	sock = xs_create_sock(xprt, transport,
-			xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
+			xs_addr(xprt)->sa_family, SOCK_DGRAM,
+			IPPROTO_UDP, false);
 	if (IS_ERR(sock))
 		goto out;
 
@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		sk->sk_allocation = GFP_ATOMIC;
 
 		/* socket options */
-		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
 		sock_reset_flag(sk, SOCK_LINGER);
 		tcp_sk(sk)->linger2 = 0;
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
 	switch (ret) {
 	case 0:
+		xs_set_srcport(transport, sock);
 	case -EINPROGRESS:
 		/* SYN_SENT! */
 		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 	if (!sock) {
 		clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
 		sock = xs_create_sock(xprt, transport,
-				xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
+				xs_addr(xprt)->sa_family, SOCK_STREAM,
+				IPPROTO_TCP, true);
 		if (IS_ERR(sock)) {
 			status = PTR_ERR(sock);
 			goto out;
-- 
2.1.0


  reply	other threads:[~2015-02-09 17:18 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-09 17:18 [PATCH v2 00/14] Fix TCP connection port number reuse in NFSv3 Trond Myklebust
2015-02-09 17:18 ` Trond Myklebust [this message]
2015-02-09 17:18   ` [PATCH v2 02/14] SUNRPC: Handle EADDRINUSE on connect Trond Myklebust
2015-02-09 17:18     ` [PATCH v2 03/14] SUNRPC: Do not clear the source port in xs_reset_transport Trond Myklebust
2015-02-09 17:18       ` [PATCH v2 04/14] SUNRPC: Ensure xs_reset_transport() resets the close connection flags Trond Myklebust
2015-02-09 17:18         ` [PATCH v2 05/14] SUNRPC: Add helpers to prevent socket create from racing Trond Myklebust
2015-02-09 17:18           ` [PATCH v2 06/14] SUNRPC: TCP/UDP always close the old socket before reconnecting Trond Myklebust
2015-02-09 17:18             ` [PATCH v2 07/14] SUNRPC: Remove TCP client connection reset hack Trond Myklebust
2015-02-09 17:18               ` [PATCH v2 08/14] SUNRPC: Remove TCP socket linger code Trond Myklebust
2015-02-09 17:18                 ` [PATCH v2 09/14] SUNRPC: Cleanup to remove remaining uses of XPRT_CONNECTION_ABORT Trond Myklebust
2015-02-09 17:18                   ` [PATCH v2 10/14] SUNRPC: Ensure xs_tcp_shutdown() requests a full close of the connection Trond Myklebust
2015-02-09 17:18                     ` [PATCH v2 11/14] SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release Trond Myklebust
2015-02-09 17:18                       ` [PATCH v2 12/14] SUNRPC: Remove the redundant XPRT_CONNECTION_CLOSE flag Trond Myklebust
2015-02-09 17:18                         ` [PATCH v2 13/14] SUNRPC: Handle connection reset more efficiently Trond Myklebust
2015-02-09 17:18                           ` [PATCH v2 14/14] SUNRPC: Define xs_tcp_fin_timeout only if CONFIG_SUNRPC_DEBUG Trond Myklebust

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1423502324-25981-2-git-send-email-trond.myklebust@primarydata.com \
    --to=trond.myklebust@primarydata.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.