All of lore.kernel.org
 help / color / mirror / Atom feed
From: Trond Myklebust <trond.myklebust@primarydata.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH v3 01/15] SUNRPC: Set SO_REUSEPORT socket option for TCP connections
Date: Mon,  9 Feb 2015 17:47:57 -0500	[thread overview]
Message-ID: <1423522091-35365-2-git-send-email-trond.myklebust@primarydata.com> (raw)
In-Reply-To: <1423522091-35365-1-git-send-email-trond.myklebust@primarydata.com>

When using TCP, we need the ability to reuse port numbers after
a disconnection, so that the NFSv3 server knows that we're the same
client. Currently we use a hack to work around the TCP socket's
TIME_WAIT: we send an RST instead of closing, which doesn't
always work...
The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple
TCP connections to the same source address+port combination, and thus
to use ordinary TCP close() instead of the current hack.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/xprtsock.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 87ce7e8bb8dc..484c5040436a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1667,6 +1667,39 @@ static unsigned short xs_get_random_port(void)
 }
 
 /**
+ * xs_set_reuseaddr_port - set the socket's port and address reuse options
+ * @sock: socket
+ *
+ * Note that this function has to be called on all sockets that share the
+ * same port, and it must be called before binding.
+ */
+static void xs_sock_set_reuseport(struct socket *sock)
+{
+	char opt = 1;
+
+	kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+}
+
+static unsigned short xs_sock_getport(struct socket *sock)
+{
+	struct sockaddr_storage buf;
+	int buflen;
+	unsigned short port = 0;
+
+	if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+		goto out;
+	switch (buf.ss_family) {
+	case AF_INET6:
+		port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port);
+		break;
+	case AF_INET:
+		port = ntohs(((struct sockaddr_in *)&buf)->sin_port);
+	}
+out:
+	return port;
+}
+
+/**
  * xs_set_port - reset the port number in the remote endpoint address
  * @xprt: generic transport
  * @port: new port number
@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 	xs_update_peer_port(xprt);
 }
 
+static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
+{
+	if (transport->srcport == 0)
+		transport->srcport = xs_sock_getport(sock);
+}
+
 static unsigned short xs_get_srcport(struct sock_xprt *transport)
 {
 	unsigned short port = transport->srcport;
@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work)
 }
 
 static struct socket *xs_create_sock(struct rpc_xprt *xprt,
-		struct sock_xprt *transport, int family, int type, int protocol)
+		struct sock_xprt *transport, int family, int type,
+		int protocol, bool reuseport)
 {
 	struct socket *sock;
 	int err;
@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
 	}
 	xs_reclassify_socket(family, sock);
 
+	if (reuseport)
+		xs_sock_set_reuseport(sock);
+
 	err = xs_bind(transport, sock);
 	if (err) {
 		sock_release(sock);
@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
 	/* Start by resetting any existing state */
 	xs_reset_transport(transport);
 	sock = xs_create_sock(xprt, transport,
-			xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
+			xs_addr(xprt)->sa_family, SOCK_DGRAM,
+			IPPROTO_UDP, false);
 	if (IS_ERR(sock))
 		goto out;
 
@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		sk->sk_allocation = GFP_ATOMIC;
 
 		/* socket options */
-		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
 		sock_reset_flag(sk, SOCK_LINGER);
 		tcp_sk(sk)->linger2 = 0;
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
 	switch (ret) {
 	case 0:
+		xs_set_srcport(transport, sock);
 	case -EINPROGRESS:
 		/* SYN_SENT! */
 		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 	if (!sock) {
 		clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
 		sock = xs_create_sock(xprt, transport,
-				xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
+				xs_addr(xprt)->sa_family, SOCK_STREAM,
+				IPPROTO_TCP, true);
 		if (IS_ERR(sock)) {
 			status = PTR_ERR(sock);
 			goto out;
-- 
2.1.0


  reply	other threads:[~2015-02-09 22:48 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-09 22:47 [PATCH v3 00/15] Fix TCP connection port number reuse in NFSv3 Trond Myklebust
2015-02-09 22:47 ` Trond Myklebust [this message]
2015-02-09 22:47   ` [PATCH v3 02/15] SUNRPC: Handle EADDRINUSE on connect Trond Myklebust
2015-02-09 22:47     ` [PATCH v3 03/15] SUNRPC: Do not clear the source port in xs_reset_transport Trond Myklebust
2015-02-09 22:48       ` [PATCH v3 04/15] SUNRPC: Ensure xs_reset_transport() resets the close connection flags Trond Myklebust
2015-02-09 22:48         ` [PATCH v3 05/15] SUNRPC: Add helpers to prevent socket create from racing Trond Myklebust
2015-02-09 22:48           ` [PATCH v3 06/15] SUNRPC: TCP/UDP always close the old socket before reconnecting Trond Myklebust
2015-02-09 22:48             ` [PATCH v3 07/15] SUNRPC: Remove TCP client connection reset hack Trond Myklebust
2015-02-09 22:48               ` [PATCH v3 08/15] SUNRPC: Remove TCP socket linger code Trond Myklebust
2015-02-09 22:48                 ` [PATCH v3 09/15] SUNRPC: Cleanup to remove remaining uses of XPRT_CONNECTION_ABORT Trond Myklebust
2015-02-09 22:48                   ` [PATCH v3 10/15] SUNRPC: Ensure xs_tcp_shutdown() requests a full close of the connection Trond Myklebust
2015-02-09 22:48                     ` [PATCH v3 11/15] SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release Trond Myklebust
2015-02-09 22:48                       ` [PATCH v3 12/15] SUNRPC: Remove the redundant XPRT_CONNECTION_CLOSE flag Trond Myklebust
2015-02-09 22:48                         ` [PATCH v3 13/15] SUNRPC: Handle connection reset more efficiently Trond Myklebust
2015-02-09 22:48                           ` [PATCH v3 14/15] SUNRPC: Define xs_tcp_fin_timeout only if CONFIG_SUNRPC_DEBUG Trond Myklebust
2015-02-09 22:48                             ` [PATCH v3 15/15] SUNRPC: Fix stupid typo in xs_sock_set_reuseport Trond Myklebust
2015-02-10 15:54                       ` [PATCH v3 11/15] SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release Anna Schumaker
2015-02-10 16:10                         ` Trond Myklebust
2016-09-29 18:52           ` [PATCH v3 05/15] SUNRPC: Add helpers to prevent socket create from racing Olga Kornievskaia
2016-09-29 20:20             ` Olga Kornievskaia

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1423522091-35365-2-git-send-email-trond.myklebust@primarydata.com \
    --to=trond.myklebust@primarydata.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.