* [MPTCP] [PATCH v1 4/4] mptcp: avoid acquiring the msk lock in mptcp_finisch_connect()
@ 2019-12-09 23:52 Paolo Abeni
0 siblings, 0 replies; only message in thread
From: Paolo Abeni @ 2019-12-09 23:52 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 12402 bytes --]
Move the mptcp_copy_inaddrs() into mptcp_finish_connect(), so that
the mks lock is no longer required. Just document the unlocked access
with WRITE_ONCE().
Since we now always acquire msk -> ssk lock in order, we can also
avoid the msk lock/subflow lookup()/get()/ msk unlock()/ subflow op/ subflow put
dance in most ops - simply keep the msk lock held the whole time.
Additionally we can simplify mptcp_getname(), as now the address information
are always available in the msk.
RFC -> v1:
- move mptcp_copy_inaddrs() into finish_connect()
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
net/mptcp/protocol.c | 211 +++++++++++++++++--------------------------
1 file changed, 83 insertions(+), 128 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a11352e5e322..70090366adae 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -36,10 +36,10 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
}
/* if msk has a single subflow, and the mp_capable handshake is failed,
- * get a reference and return it.
+ * return it.
* Otherwise returns NULL
*/
-static struct socket *__mptcp_tcp_fallback_get(const struct mptcp_sock *msk)
+static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
{
struct socket *ssock = __mptcp_nmpc_socket(msk);
@@ -48,7 +48,6 @@ static struct socket *__mptcp_tcp_fallback_get(const struct mptcp_sock *msk)
if (!ssock || tcp_sk(ssock->sk)->is_mptcp)
return NULL;
- sock_hold(ssock->sk);
return ssock;
}
@@ -70,28 +69,23 @@ static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
return ((struct sock *)msk)->sk_state == TCP_CLOSE;
}
-static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk, int state)
+static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int err;
- lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (ssock)
goto set_state;
- if (!__mptcp_can_create_subflow(msk)) {
- ssock = ERR_PTR(-EINVAL);
- goto release;
- }
+ if (!__mptcp_can_create_subflow(msk))
+ return ERR_PTR(-EINVAL);
err = mptcp_subflow_create_socket(sk, &ssock);
- if (err) {
- ssock = ERR_PTR(err);
- goto release;
- }
+ if (err)
+ return ERR_PTR(err);
msk->subflow = ssock;
subflow = mptcp_subflow_ctx(ssock->sk);
@@ -100,12 +94,8 @@ static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk, int state)
subflow->request_version = 1; /* only v1 supported */
set_state:
- sock_hold(ssock->sk);
if (state != TCP_MAX_STATES)
inet_sk_state_store(sk, state);
-
-release:
- release_sock(sk);
return ssock;
}
@@ -273,12 +263,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EOPNOTSUPP;
lock_sock(sk);
- ssock = __mptcp_tcp_fallback_get(msk);
+ ssock = __mptcp_tcp_fallback(msk);
if (ssock) {
- release_sock(sk);
pr_debug("fallback passthrough");
ret = sock_sendmsg(ssock, msg);
- sock_put(ssock->sk);
+ release_sock(sk);
return ret;
}
@@ -376,13 +365,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
long timeo;
lock_sock(sk);
- ssock = __mptcp_tcp_fallback_get(msk);
+ ssock = __mptcp_tcp_fallback(msk);
if (ssock) {
- release_sock(sk);
pr_debug("fallback-read subflow=%p",
mptcp_subflow_ctx(ssock->sk));
copied = sock_recvmsg(ssock, msg, flags);
- sock_put(ssock->sk);
+ release_sock(sk);
return copied;
}
@@ -708,25 +696,26 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
{
struct mptcp_sock *msk = mptcp_sk(sk);
char __kernel *optval;
+ int ret = -EOPNOTSUPP;
struct socket *ssock;
- int ret;
/* will be treated as __user in tcp_setsockopt */
optval = (char __kernel __force *)uoptval;
pr_debug("msk=%p", msk);
- ssock = mptcp_socket_create_get(msk, TCP_MAX_STATES);
+
+ /* @@ the meaning of setsockopt() when the socket is connected and
+ * there are multiple subflows is not defined.
+ */
+ lock_sock(sk);
+ ssock = __mptcp_socket_create(msk, TCP_MAX_STATES);
if (!IS_ERR(ssock)) {
pr_debug("subflow=%p", ssock->sk);
ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
- sock_put(ssock->sk);
- return ret;
}
+ release_sock(sk);
- /* @@ the meaning of setsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
- */
- return -EOPNOTSUPP;
+ return ret;
}
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
@@ -734,27 +723,28 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
{
struct mptcp_sock *msk = mptcp_sk(sk);
char __kernel *optval;
+ int ret = -EOPNOTSUPP;
int __kernel *option;
struct socket *ssock;
- int ret;
/* will be treated as __user in tcp_getsockopt */
optval = (char __kernel __force *)uoptval;
option = (int __kernel __force *)uoption;
pr_debug("msk=%p", msk);
- ssock = mptcp_socket_create_get(msk, TCP_MAX_STATES);
+ lock_sock(sk);
+
+ /* @@ the meaning of getsockopt() when the socket is connected and
+ * there are multiple subflows is not defined.
+ */
+ ssock = __mptcp_socket_create(msk, TCP_MAX_STATES);
if (!IS_ERR(ssock)) {
pr_debug("subflow=%p", ssock->sk);
ret = kernel_getsockopt(ssock, level, optname, optval, option);
- sock_put(ssock->sk);
- return ret;
}
+ release_sock(sk);
- /* @@ the meaning of getsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
- */
- return -EOPNOTSUPP;
+ return ret;
}
static int mptcp_get_port(struct sock *sk, unsigned short snum)
@@ -773,57 +763,34 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
void mptcp_finish_connect(struct sock *ssk)
{
struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
struct sock *sk;
+ u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
- sk = subflow->conn;
-
- if (subflow->mp_capable) {
- struct mptcp_sock *msk = mptcp_sk(sk);
- u64 ack_seq;
+ if (!subflow->mp_capable)
+ return;
- /* sk (new subflow socket) is already locked, but we need
- * to lock the parent (mptcp) socket now to add the tcp socket
- * to the subflow list.
- *
- * From lockdep point of view, this creates an ABBA type
- * deadlock: Normally (sendmsg, recvmsg, ..), we lock the mptcp
- * socket, then acquire a subflow lock.
- * Here we do the reverse: "subflow lock, then mptcp lock".
- *
- * Its alright to do this here, because this subflow is not yet
- * on the mptcp sockets subflow list.
- *
- * IOW, if another CPU has this mptcp socket locked, it cannot
- * acquire this particular subflow, because subflow->sk isn't
- * on msk->conn_list.
- *
- * This function can be called either from backlog processing
- * (BH will be enabled) or from softirq, so we need to use BH
- * locking scheme.
- */
- local_bh_disable();
- bh_lock_sock_nested(sk);
+ sk = subflow->conn;
+ msk = mptcp_sk(sk);
- msk->remote_key = subflow->remote_key;
- msk->local_key = subflow->local_key;
- msk->token = subflow->token;
- mptcp_copy_inaddrs(sk, ssk);
+ pr_debug("msk=%p, token=%u", sk, subflow->token);
- pr_debug("msk=%p, token=%u", msk, msk->token);
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+ ack_seq++;
+ subflow->map_seq = ack_seq;
+ subflow->map_subflow_seq = 1;
+ subflow->rel_write_seq = 1;
- mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
- msk->write_seq = subflow->idsn + 1;
- ack_seq++;
- msk->ack_seq = ack_seq;
- msk->can_ack = 1;
- subflow->map_seq = ack_seq;
- subflow->map_subflow_seq = 1;
- subflow->rel_write_seq = 1;
- bh_unlock_sock(sk);
- local_bh_enable();
- }
- inet_sk_state_store(sk, TCP_ESTABLISHED);
+ /* the socket is not connected yet, no msk/subflow ops can access/race
+ * accessing the field below
+ */
+ WRITE_ONCE(msk->remote_key, subflow->remote_key);
+ WRITE_ONCE(msk->local_key, subflow->local_key);
+ WRITE_ONCE(msk->token, subflow->token);
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->ack_seq, ack_seq);
+ WRITE_ONCE(msk->can_ack, 1);
}
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -868,12 +835,19 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct socket *ssock;
int err;
- ssock = mptcp_socket_create_get(msk, TCP_MAX_STATES);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ lock_sock(sock->sk);
+ ssock = __mptcp_socket_create(msk, TCP_MAX_STATES);
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
+ goto unlock;
+ }
err = ssock->ops->bind(ssock, uaddr, addr_len);
- sock_put(ssock->sk);
+ if (!err)
+ mptcp_copy_inaddrs(sock->sk, ssock->sk);
+
+unlock:
+ release_sock(sock->sk);
return err;
}
@@ -884,9 +858,12 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct socket *ssock;
int err;
- ssock = mptcp_socket_create_get(msk, TCP_SYN_SENT);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ lock_sock(sock->sk);
+ ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
+ goto unlock;
+ }
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -897,33 +874,19 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
#endif
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
-
- /* should we keep msk socket lock instead? and release it this
- * latter state update */
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- sock_put(ssock->sk);
+ mptcp_copy_inaddrs(sock->sk, ssock->sk);
+
+unlock:
+ release_sock(sock->sk);
return err;
}
static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
int peer, int af)
{
- struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
int ret;
- lock_sock(sock->sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock)
- sock_hold(ssock->sk);
- release_sock(sock->sk);
- if (ssock) {
- pr_debug("subflow=%p", ssock->sk);
- ret = ssock->ops->getname(ssock, uaddr, peer);
- sock_put(ssock->sk);
- return ret;
- }
-
switch (af) {
case AF_INET:
ret = inet_getname(sock, uaddr, peer);
@@ -987,16 +950,20 @@ static int mptcp_listen(struct socket *sock, int backlog)
pr_debug("msk=%p", msk);
- ssock = mptcp_socket_create_get(msk, TCP_LISTEN);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ lock_sock(sock->sk);
+ ssock = __mptcp_socket_create(msk, TCP_LISTEN);
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
+ goto unlock;
+ }
err = ssock->ops->listen(ssock, backlog);
-
- /* should we keep msk socket lock instead? and release it this
- * latter state update */
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- sock_put(ssock->sk);
+ if (!err)
+ mptcp_copy_inaddrs(sock->sk, ssock->sk);
+
+unlock:
+ release_sock(sock->sk);
return err;
}
@@ -1065,10 +1032,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (ssock) {
- sock_hold(ssock->sk);
- release_sock(sk);
mask = ssock->ops->poll(file, ssock, wait);
- sock_put(ssock->sk);
+ release_sock(sk);
return mask;
}
@@ -1092,7 +1057,6 @@ static int mptcp_shutdown(struct socket *sock, int how)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct mptcp_subflow_context *subflow;
- struct socket *ssock;
int ret = 0;
pr_debug("sk=%p, how=%d", msk, how);
@@ -1102,15 +1066,6 @@ static int mptcp_shutdown(struct socket *sock, int how)
if (how == SHUT_WR || how == SHUT_RDWR)
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
- ssock = __mptcp_tcp_fallback_get(msk);
- if (ssock) {
- release_sock(sock->sk);
- pr_debug("subflow=%p", ssock->sk);
- ret = kernel_sock_shutdown(ssock, how);
- sock_put(ssock->sk);
- return ret;
- }
-
how++;
if ((how & ~SHUTDOWN_MASK) || !how) {
--
2.21.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-12-09 23:52 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-09 23:52 [MPTCP] [PATCH v1 4/4] mptcp: avoid acquiring the msk lock in mptcp_finisch_connect() Paolo Abeni
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.