All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
To: edumazet@google.com, netdev@vger.kernel.org
Cc: Florian Westphal <fw@strlen.de>,
	cpaasch@apple.com, pabeni@redhat.com,
	peter.krystad@linux.intel.com, dcaratti@redhat.com,
	matthieu.baerts@tessares.net
Subject: [RFC PATCH net-next 21/33] mptcp: add and use mptcp_subflow_hold
Date: Mon, 17 Jun 2019 15:57:56 -0700	[thread overview]
Message-ID: <20190617225808.665-22-mathew.j.martineau@linux.intel.com> (raw)
In-Reply-To: <20190617225808.665-1-mathew.j.martineau@linux.intel.com>

From: Florian Westphal <fw@strlen.de>

subflow sockets already have lifetime managed by RCU, so we can
switch to atomic_inc_not_zero and skip/pretend we did not find
such socket in the mptcp subflow list.

This is required to get rid of synchronize_rcu() from mptcp_close().

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/protocol.c | 104 +++++++++++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 38 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c00e837a1766..0db4099d9c13 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -24,14 +24,35 @@ static inline bool before64(__u64 seq1, __u64 seq2)
 
 #define after64(seq2, seq1)	before64(seq1, seq2)
 
+static bool mptcp_subflow_hold(struct subflow_context *subflow)
+{
+	struct sock *sk = mptcp_subflow_tcp_socket(subflow)->sk;
+
+	return refcount_inc_not_zero(&sk->sk_refcnt);
+}
+
+static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
+{
+	struct subflow_context *subflow;
+
+	rcu_read_lock();
+	mptcp_for_each_subflow(msk, subflow) {
+		if (mptcp_subflow_hold(subflow)) {
+			rcu_read_unlock();
+			return mptcp_subflow_tcp_socket(subflow)->sk;
+		}
+	}
+
+	rcu_read_unlock();
+	return NULL;
+}
+
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	int mss_now, size_goal, poffset, ret;
 	struct mptcp_ext *mpext = NULL;
-	struct subflow_context *subflow;
 	struct page *page = NULL;
-	struct hlist_node *node;
 	struct sk_buff *skb;
 	struct sock *ssk;
 	size_t psize;
@@ -42,20 +63,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		return sock_sendmsg(msk->subflow, msg);
 	}
 
-	rcu_read_lock();
-	node = rcu_dereference(hlist_first_rcu(&msk->conn_list));
-	subflow = hlist_entry(node, struct subflow_context, node);
-	ssk = mptcp_subflow_tcp_socket(subflow)->sk;
-	sock_hold(ssk);
-	rcu_read_unlock();
+	ssk = mptcp_subflow_get_ref(msk);
+	if (!ssk)
+		return -ENOTCONN;
 
 	if (!msg_data_left(msg)) {
 		pr_debug("empty send");
-		ret = sock_sendmsg(mptcp_subflow_tcp_socket(subflow), msg);
+		ret = sock_sendmsg(ssk->sk_socket, msg);
 		goto put_out;
 	}
 
-	pr_debug("conn_list->subflow=%p", subflow);
+	pr_debug("conn_list->subflow=%p", ssk);
 
 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) {
 		ret = -ENOTSUPP;
@@ -293,7 +311,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct subflow_context *subflow;
 	struct mptcp_read_arg arg;
-	struct hlist_node *node;
 	read_descriptor_t desc;
 	struct tcp_sock *tp;
 	struct sock *ssk;
@@ -306,13 +323,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		return sock_recvmsg(msk->subflow, msg, flags);
 	}
 
-	rcu_read_lock();
-	node = rcu_dereference(hlist_first_rcu(&msk->conn_list));
-	subflow = hlist_entry(node, struct subflow_context, node);
-	ssk = mptcp_subflow_tcp_socket(subflow)->sk;
-	sock_hold(ssk);
-	rcu_read_unlock();
+	ssk = mptcp_subflow_get_ref(msk);
+	if (!ssk)
+		return -ENOTCONN;
 
+	subflow = subflow_ctx(ssk);
 	tp = tcp_sk(ssk);
 
 	lock_sock(sk);
@@ -778,8 +793,6 @@ static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
 			 int peer)
 {
 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
-	struct subflow_context *subflow;
-	struct hlist_node *node;
 	struct sock *ssk;
 	int ret;
 
@@ -794,14 +807,11 @@ static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
 	 * is connected and there are multiple subflows is not defined.
 	 * For now just use the first subflow on the list.
 	 */
-	rcu_read_lock();
-	node = rcu_dereference(hlist_first_rcu(&msk->conn_list));
-	subflow = hlist_entry(node, struct subflow_context, node);
-	ssk = mptcp_subflow_tcp_socket(subflow)->sk;
-	sock_hold(ssk);
-	rcu_read_unlock();
+	ssk = mptcp_subflow_get_ref(msk);
+	if (!ssk)
+		return -ENOTCONN;
 
-	ret = inet_getname(mptcp_subflow_tcp_socket(subflow), uaddr, peer);
+	ret = inet_getname(ssk->sk_socket, uaddr, peer);
 	sock_put(ssk);
 	return ret;
 }
@@ -837,26 +847,44 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait)
 {
-	const struct mptcp_sock *msk;
 	struct subflow_context *subflow;
+	const struct mptcp_sock *msk;
 	struct sock *sk = sock->sk;
-	struct hlist_node *node;
-	struct sock *ssk;
-	__poll_t ret;
+	__poll_t ret = 0;
+	unsigned int i;
 
 	msk = mptcp_sk(sk);
 	if (msk->subflow)
 		return tcp_poll(file, msk->subflow, wait);
 
-	rcu_read_lock();
-	node = rcu_dereference(hlist_first_rcu(&msk->conn_list));
-	subflow = hlist_entry(node, struct subflow_context, node);
-	ssk = mptcp_subflow_tcp_socket(subflow)->sk;
-	sock_hold(ssk);
-	rcu_read_unlock();
+	i = 0;
+	for (;;) {
+		struct subflow_context *tmp = NULL;
+		int j = 0;
+
+		rcu_read_lock();
+		mptcp_for_each_subflow(msk, subflow) {
+			if (j < i) {
+				j++;
+				continue;
+			}
+
+			if (!mptcp_subflow_hold(subflow))
+				continue;
+
+			tmp = subflow;
+			i++;
+			break;
+		}
+		rcu_read_unlock();
+
+		if (!tmp)
+			break;
+
+		ret |= tcp_poll(file, mptcp_subflow_tcp_socket(tmp), wait);
+		sock_put(mptcp_subflow_tcp_socket(tmp)->sk);
+	}
 
-	ret = tcp_poll(file, ssk->sk_socket, wait);
-	sock_put(ssk);
 	return ret;
 }
 
-- 
2.22.0


  parent reply	other threads:[~2019-06-17 22:59 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-17 22:57 [RFC PATCH net-next 00/33] Multipath TCP Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 01/33] tcp: Add MPTCP option number Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 02/33] tcp: Define IPPROTO_MPTCP Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 03/33] mptcp: Add MPTCP socket stubs Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 04/33] mptcp: Handle MPTCP TCP options Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 05/33] mptcp: Associate MPTCP context with TCP socket Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 06/33] tcp: Expose tcp struct and routine for MPTCP Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 07/33] mptcp: Handle MP_CAPABLE options for outgoing connections Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 08/33] mptcp: add mptcp_poll Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 09/33] tcp, ulp: Add clone operation to tcp_ulp_ops Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 10/33] mptcp: Create SUBFLOW socket for incoming connections Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 11/33] mptcp: Add key generation and token tree Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 12/33] mptcp: Add shutdown() socket operation Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 13/33] mptcp: Add setsockopt()/getsockopt() socket operations Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 14/33] tcp: clean ext on tx recycle Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 15/33] mptcp: Add MPTCP to skb extensions Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 16/33] tcp: Prevent coalesce/collapse when skb has MPTCP extensions Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 17/33] tcp: Export low-level TCP functions Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 18/33] mptcp: Write MPTCP DSS headers to outgoing data packets Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 19/33] mptcp: Implement MPTCP receive path Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 20/33] mptcp: Make connection_list a real list of subflows Mat Martineau
2019-06-17 22:57 ` Mat Martineau [this message]
2019-06-17 22:57 ` [RFC PATCH net-next 22/33] mptcp: add basic kselftest program Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 23/33] mptcp: selftests: switch to netns+veth based tests Mat Martineau
2019-06-17 22:57 ` [RFC PATCH net-next 24/33] mptcp: selftests: Add capture option Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 25/33] mptcp: use sk_page_frag() in sendmsg Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 26/33] mptcp: sendmsg() do spool all the provided data Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 27/33] mptcp: allow collapsing consecutive sendpages on the same substream Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 28/33] tcp: Check for filled TCP option space before SACK Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 29/33] mptcp: accept: don't leak mptcp socket structure Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 30/33] mptcp: switch sublist to mptcp socket lock protection Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 31/33] mptcp: Add path manager interface Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 32/33] mptcp: Add ADD_ADDR handling Mat Martineau
2019-06-17 22:58 ` [RFC PATCH net-next 33/33] mptcp: Add handling of incoming MP_JOIN requests Mat Martineau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190617225808.665-22-mathew.j.martineau@linux.intel.com \
    --to=mathew.j.martineau@linux.intel.com \
    --cc=cpaasch@apple.com \
    --cc=dcaratti@redhat.com \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=matthieu.baerts@tessares.net \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=peter.krystad@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.