All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Bernard Metzler" <BMT@zurich.ibm.com>
To: "Stefan Metzmacher" <metze@samba.org>
Cc: "linux-rdma" <linux-rdma@vger.kernel.org>
Subject: Re: [PATCH 20/31] rdma/siw: implement non-blocking connect.
Date: Tue, 11 May 2021 12:31:08 +0000	[thread overview]
Message-ID: <OFBC50BE05.41477A2E-ON002586D2.0044C4E8-002586D2.0044C4EE@notes.na.collabserv.com> (raw)
In-Reply-To: <80a82b4d3029d1a63042910e3ac4c3731561967e.1620343860.git.metze@samba.org>

-----"Stefan Metzmacher" <metze@samba.org> wrote: -----

>To: "Bernard Metzler" <bmt@zurich.ibm.com>
>From: "Stefan Metzmacher" <metze@samba.org>
>Date: 05/07/2021 01:39AM
>Cc: linux-rdma@vger.kernel.org, "Stefan Metzmacher" <metze@samba.org>
>Subject: [EXTERNAL] [PATCH 20/31] rdma/siw: implement non-blocking
>connect.
>
>This is very important in order to prevent deadlocks.
>
>The RDMA application layer expects rdma_connect() to be non-blocking
>as the completion is handled via RDMA_CM_EVENT_ESTABLISHED and
>other async events. It's not unlikely to hold a lock during
>the rdma_connect() call.
>
>Without out this a connection attempt to a non-existing/reachable
>server block until the very long tcp timeout hits.
>The application layer had no chance to have its own timeout handler
>as that would just deadlock with the already blocking rdma_connect().
>
>Fixes: 6c52fdc244b5 ("rdma/siw: connection management")
>Signed-off-by: Stefan Metzmacher <metze@samba.org>
>Cc: Bernard Metzler <bmt@zurich.ibm.com>
>Cc: linux-rdma@vger.kernel.org
>---
> drivers/infiniband/sw/siw/siw_cm.c | 114
>+++++++++++++++++++++--------
> drivers/infiniband/sw/siw/siw_cm.h |   1 +
> 2 files changed, 85 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/infiniband/sw/siw/siw_cm.c
>b/drivers/infiniband/sw/siw/siw_cm.c
>index cf0f881c6793..9a550f040678 100644
>--- a/drivers/infiniband/sw/siw/siw_cm.c
>+++ b/drivers/infiniband/sw/siw/siw_cm.c
>@@ -37,6 +37,7 @@ static void siw_cm_llp_write_space(struct sock *s);
> static void siw_cm_llp_error_report(struct sock *s);
> static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type
>reason,
> 			 int status);
>+static void siw_connected(struct siw_cep *cep);
> 
> static void siw_sk_assign_cm_upcalls(struct sock *sk)
> {
>@@ -1141,6 +1142,10 @@ static void siw_cm_work_handler(struct
>work_struct *w)
> 		siw_accept_newconn(cep);
> 		break;
> 
>+	case SIW_CM_WORK_CONNECTED:
>+		siw_connected(cep);
>+		break;
>+
> 	case SIW_CM_WORK_READ_MPAHDR:
> 		if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
> 			if (cep->listen_cep) {
>@@ -1306,6 +1311,7 @@ static void siw_cm_llp_data_ready(struct sock
>*sk)
> 	switch (cep->state) {
> 	case SIW_EPSTATE_RDMA_MODE:
> 	case SIW_EPSTATE_LISTENING:
>+	case SIW_EPSTATE_CONNECTING:
> 		break;
> 
> 	case SIW_EPSTATE_AWAIT_MPAREQ:
>@@ -1359,12 +1365,26 @@ static void siw_cm_llp_state_change(struct
>sock *sk)
> 
> 	switch (sk->sk_state) {
> 	case TCP_ESTABLISHED:
>-		/*
>-		 * handle accepting socket as special case where only
>-		 * new connection is possible
>-		 */
>-		siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT);
>-		break;
>+		if (cep->state == SIW_EPSTATE_CONNECTING) {
>+			/*
>+			 * handle accepting socket as special case where only
>+			 * new connection is possible
>+			 */
>+			siw_cm_queue_work(cep, SIW_CM_WORK_CONNECTED);
>+			break;
>+
>+		} else if (cep->state == SIW_EPSTATE_LISTENING) {
>+			/*
>+			 * handle accepting socket as special case where only
>+			 * new connection is possible
>+			 */
>+			siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT);
>+			break;
>+		}
>+		siw_dbg_cep(cep,
>+			    "unexpected socket state %d with cep state %d\n",
>+			    sk->sk_state, cep->state);
>+		/* fall through */
> 
> 	case TCP_CLOSE:
> 	case TCP_CLOSE_WAIT:
>@@ -1383,7 +1403,7 @@ static void siw_cm_llp_state_change(struct sock
>*sk)
> static int kernel_bindconnect(struct socket *s, struct sockaddr
>*laddr,
> 			      struct sockaddr *raddr, bool afonly)
> {
>-	int rv, flags = 0;
>+	int rv;
> 	size_t size = laddr->sa_family == AF_INET ?
> 		sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
> 
>@@ -1402,7 +1422,7 @@ static int kernel_bindconnect(struct socket *s,
>struct sockaddr *laddr,
> 	if (rv < 0)
> 		return rv;
> 
>-	rv = kernel_connect(s, raddr, size, flags);
>+	rv = kernel_connect(s, raddr, size, O_NONBLOCK);
> 
> 	return rv < 0 ? rv : 0;
> }
>@@ -1547,36 +1567,27 @@ int siw_connect(struct iw_cm_id *id, struct
>iw_cm_conn_param *params)
> 		goto error;
> 	}
> 
>-	/*
>-	 * NOTE: For simplification, connect() is called in blocking
>-	 * mode. Might be reconsidered for async connection setup at
>-	 * TCP level.
>-	 */
> 	rv = kernel_bindconnect(s, laddr, raddr, id->afonly);
>+	if (rv == -EINPROGRESS) {
>+		siw_dbg_qp(qp, "kernel_bindconnect: EINPROGRESS\n");
>+		rv = 0;
>+	}
> 	if (rv != 0) {
> 		siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
> 		goto error;
> 	}
>-	if (siw_tcp_nagle == false)
>-		tcp_sock_set_nodelay(s->sk);
>-
>-	cep->state = SIW_EPSTATE_AWAIT_MPAREP;
> 
>-	rv = siw_send_mpareqrep(cep, cep->mpa.pdata,
>-				cep->mpa.hdr.params.pd_len);
> 	/*
>-	 * Reset private data.
>+	 * The rest will be done by siw_connected()

Please use more concise language, giving some details.
The 'rest' and 'everything' refers to some state of code
understanding we cannot assume for every reader ;)


>+	 *
>+	 * siw_cm_llp_state_change() will detect
>+	 * TCP_ESTABLISHED and schedules SIW_CM_WORK_CONNECTED,
>+	 * which will finally call siw_connected().
>+	 *
>+	 * As siw_cm_llp_state_change() handles everything
>+	 * siw_cm_llp_data_ready() can be a noop for
>+	 * SIW_EPSTATE_CONNECTING.
> 	 */
>-	if (cep->mpa.hdr.params.pd_len) {
>-		cep->mpa.hdr.params.pd_len = 0;
>-		kfree(cep->mpa.pdata);
>-		cep->mpa.pdata = NULL;
>-	}
>-
>-	if (rv < 0) {
>-		goto error;
>-	}
>-
> 	siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp));
> 	siw_cep_set_free(cep);
> 	return 0;
>@@ -1604,6 +1615,49 @@ int siw_connect(struct iw_cm_id *id, struct
>iw_cm_conn_param *params)
> 	return rv;
> }
> 
>+static void siw_connected(struct siw_cep *cep)
>+{
>+	struct siw_qp *qp = cep->qp;
>+	struct socket *s = cep->sock;
>+	int rv = -ECONNABORTED;
>+
>+	/*
>+	 * already called with
>+	 * siw_cep_set_inuse(cep);
>+	 */
>+
>+	if (cep->state != SIW_EPSTATE_CONNECTING)
>+		goto error;
>+
>+	if (siw_tcp_nagle == false)
>+		tcp_sock_set_nodelay(s->sk);
>+
>+	cep->state = SIW_EPSTATE_AWAIT_MPAREP;
>+
>+	rv = siw_send_mpareqrep(cep, cep->mpa.pdata,
>+				cep->mpa.hdr.params.pd_len);
>+	/*
>+	 * Reset private data.
>+	 */
>+	if (cep->mpa.hdr.params.pd_len) {
>+		cep->mpa.hdr.params.pd_len = 0;
>+		kfree(cep->mpa.pdata);
>+		cep->mpa.pdata = NULL;
>+	}
>+
>+	if (rv < 0) {
>+		goto error;
>+	}
>+
>+	siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp));
>+	return;
>+
>+error:
>+	siw_dbg_cep(cep, "[QP %u]: exit, error %d\n", qp_id(qp), rv);
>+	siw_qp_cm_drop(qp, 1);
>+	return;
>+}
>+
> /*
>  * siw_accept - Let SoftiWARP accept an RDMA connection request
>  *
>diff --git a/drivers/infiniband/sw/siw/siw_cm.h
>b/drivers/infiniband/sw/siw/siw_cm.h
>index 4f6219bd746b..62c9947999ac 100644
>--- a/drivers/infiniband/sw/siw/siw_cm.h
>+++ b/drivers/infiniband/sw/siw/siw_cm.h
>@@ -78,6 +78,7 @@ struct siw_cep {
> 
> enum siw_work_type {
> 	SIW_CM_WORK_ACCEPT = 1,
>+	SIW_CM_WORK_CONNECTED,
> 	SIW_CM_WORK_READ_MPAHDR,
> 	SIW_CM_WORK_CLOSE_LLP, /* close socket */
> 	SIW_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
>-- 
>2.25.1
>
>


  parent reply	other threads:[~2021-05-11 12:31 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-06 23:36 [PATCH 00/31] rdma/siw: fix a lot of deadlocks and use after free bugs Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 01/31] rdma/siw: fix warning in siw_proc_send() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 02/31] rdma/siw: call smp_mb() after mem->stag_valid = 0 in siw_invalidate_stag() too Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 03/31] rdma/siw: remove superfluous siw_cep_put() from siw_connect() error path Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 04/31] rdma/siw: let siw_accept() deferr RDMA_MODE until EVENT_ESTABLISHED Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 05/31] rdma/siw: make use of kernel_{bind,connect,listen}() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 06/31] rdma/siw: make siw_cm_upcall() a noop without valid 'id' Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 07/31] rdma/siw: split out a __siw_cep_terminate_upcall() function Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 08/31] rdma/siw: use __siw_cep_terminate_upcall() for indirect SIW_CM_WORK_CLOSE_LLP Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 09/31] rdma/siw: use __siw_cep_terminate_upcall() for SIW_CM_WORK_PEER_CLOSE Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 10/31] rdma/siw: use __siw_cep_terminate_upcall() for SIW_CM_WORK_MPATIMEOUT Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 11/31] rdma/siw: introduce SIW_EPSTATE_ACCEPTING/REJECTING for rdma_accept/rdma_reject Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 12/31] rdma/siw: add some debugging of state and sk_state to the teardown process Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 13/31] rdma/siw: handle SIW_EPSTATE_CONNECTING in __siw_cep_terminate_upcall() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 14/31] rdma/siw: let siw_connect() set AWAIT_MPAREP before siw_send_mpareqrep() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 15/31] rdma/siw: create a temporary copy of private data Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 16/31] rdma/siw: use error and out logic at the end of siw_connect() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 17/31] rdma/siw: start mpa timer before calling siw_send_mpareqrep() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 18/31] rdma/siw: call the blocking kernel_bindconnect() just before siw_send_mpareqrep() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 19/31] rdma/siw: split out a __siw_cep_close() function Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 20/31] rdma/siw: implement non-blocking connect Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 21/31] rdma/siw: let siw_listen_address() call siw_cep_alloc() first Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 22/31] rdma/siw: let siw_listen_address() call siw_cep_set_inuse() early Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 23/31] rdma/siw: make use of __siw_cep_close() in siw_accept() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 24/31] rdma/siw: do the full disassociation of cep and qp in siw_qp_llp_close() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 25/31] rdma/siw: fix double siw_cep_put() in siw_cm_work_handler() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 26/31] rdma/siw: make use of __siw_cep_close() " Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 27/31] rdma/siw: fix the "close" logic in siw_qp_cm_drop() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 28/31] rdma/siw: make use of __siw_cep_close() " Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 29/31] rdma/siw: make use of __siw_cep_close() in siw_reject() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 30/31] rdma/siw: make use of __siw_cep_close() in siw_listen_address() Stefan Metzmacher
2021-05-06 23:36 ` [PATCH 31/31] rdma/siw: make use of __siw_cep_close() in siw_drop_listeners() Stefan Metzmacher
2021-05-07 12:08 ` [PATCH 00/31] rdma/siw: fix a lot of deadlocks and use after free bugs Bernard Metzler
2021-05-26 15:43   ` Stefan Metzmacher
2021-05-28 14:35   ` Bernard Metzler
2021-05-07 12:15 ` [PATCH 01/31] rdma/siw: fix warning in siw_proc_send() Bernard Metzler
2021-05-07 13:52 ` [PATCH 03/31] rdma/siw: remove superfluous siw_cep_put() from siw_connect() error path Bernard Metzler
2021-05-11 11:31 ` [PATCH 02/31] rdma/siw: call smp_mb() after mem->stag_valid = 0 in siw_invalidate_stag() too Bernard Metzler
2021-05-11 11:36 ` [PATCH 05/31] rdma/siw: make use of kernel_{bind,connect,listen}() Bernard Metzler
2021-05-11 11:38 ` [PATCH 04/31] rdma/siw: let siw_accept() deferr RDMA_MODE until EVENT_ESTABLISHED Bernard Metzler
2021-05-11 11:42 ` [PATCH 06/31] rdma/siw: make siw_cm_upcall() a noop without valid 'id' Bernard Metzler
2021-05-11 11:55 ` [PATCH 07/31] rdma/siw: split out a __siw_cep_terminate_upcall() function Bernard Metzler
2021-05-11 11:56 ` [PATCH 08/31] rdma/siw: use __siw_cep_terminate_upcall() for indirect SIW_CM_WORK_CLOSE_LLP Bernard Metzler
2021-05-11 12:00 ` [PATCH 09/31] rdma/siw: use __siw_cep_terminate_upcall() for SIW_CM_WORK_PEER_CLOSE Bernard Metzler
2021-05-11 12:01 ` [PATCH 10/31] rdma/siw: use __siw_cep_terminate_upcall() for SIW_CM_WORK_MPATIMEOUT Bernard Metzler
2021-05-11 12:03 ` [PATCH 11/31] rdma/siw: introduce SIW_EPSTATE_ACCEPTING/REJECTING for rdma_accept/rdma_reject Bernard Metzler
2021-05-11 12:07 ` [PATCH 12/31] rdma/siw: add some debugging of state and sk_state to the teardown process Bernard Metzler
2021-05-11 12:15 ` [PATCH 15/31] rdma/siw: create a temporary copy of private data Bernard Metzler
2021-05-11 12:18 ` [PATCH 16/31] rdma/siw: use error and out logic at the end of siw_connect() Bernard Metzler
2021-05-11 12:20 ` [PATCH 17/31] rdma/siw: start mpa timer before calling siw_send_mpareqrep() Bernard Metzler
2021-05-11 12:25 ` [PATCH 19/31] rdma/siw: split out a __siw_cep_close() function Bernard Metzler
2021-05-11 12:31 ` Bernard Metzler [this message]
2021-05-11 12:42 ` [PATCH 22/31] rdma/siw: let siw_listen_address() call siw_cep_set_inuse() early Bernard Metzler
2021-05-11 12:43 ` [PATCH 23/31] rdma/siw: make use of __siw_cep_close() in siw_accept() Bernard Metzler
2021-05-11 12:47 ` [PATCH 24/31] rdma/siw: do the full disassociation of cep and qp in siw_qp_llp_close() Bernard Metzler
2021-05-11 12:58 ` [PATCH 25/31] rdma/siw: fix double siw_cep_put() in siw_cm_work_handler() Bernard Metzler
2021-05-11 13:02 ` [PATCH 27/31] rdma/siw: fix the "close" logic in siw_qp_cm_drop() Bernard Metzler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=OFBC50BE05.41477A2E-ON002586D2.0044C4E8-002586D2.0044C4EE@notes.na.collabserv.com \
    --to=bmt@zurich.ibm.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=metze@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.