All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@ziepe.ca>
To: "Håkon Bugge" <haakon.bugge@oracle.com>
Cc: Doug Ledford <dledford@redhat.com>,
	linux-rdma@vger.kernel.org, ted.h.kim@oracle.com,
	william.taylor@oracle.com
Subject: Re: [PATCH for-rc] RDMA/cm: Do not send REJ when remote_id is unknown
Date: Tue, 14 Apr 2020 17:40:15 -0300	[thread overview]
Message-ID: <20200414204015.GA28572@ziepe.ca> (raw)
In-Reply-To: <20200414111720.1789168-1-haakon.bugge@oracle.com>

On Tue, Apr 14, 2020 at 01:17:20PM +0200, Håkon Bugge wrote:
> In cm_destroy_id(), when the cm_id state is IB_CM_REQ_SENT or
> IB_CM_MRA_REQ_RCVD, an attempt is made to send a REJ with
> IB_CM_REJ_TIMEOUT as the reject reason.

Yes, this causes the remote to destroy the half completed connection,
for instance if the path is unidirectional.

> However, in said states, we have no remote_id. For the REQ_SENT case,
> we simply haven't received anything from our peer,

Which is correct, the spec covers this in Table 108 which describes
the remote communication ID as '0 if REJecting due to REP timeout and
no MRA was received'

This is implemented in cm_acquire_rejected_id(), assuming it isn't
buggy.

> for the MRA_REQ_RCVD case, the cm_rma_handler() doesn't pick up the
> remote_id.

This seems like a bug. It would be appropriate to store the remote id
when getting a MRA and set it in cm_format_rej() if a MRA has been rx'd

It also seems like a bug that cm_acquire_rejected_id() does not check
the remote_comm_id if it is not zero.

And for this reason it also seems unwise that cm_alloc_id_priv() will
allocate 0 cm_id's, as that value appears to have special meaning, oh
and it is unwise to use 0 with cm_acquire_id(). Tsk.

The CM_MSG_RESPONSE_REQ path looks kind of wrong too..

> Therefore, it is no reason to send this REJ, since it simply will be
> tossed at the peer's CM layer (if it reaches it). If running in CX-3
> virtualized and having the pr_debug enabled in the mlx4_ib driver, we
> will see:
> 
> mlx4_ib_demux_cm_handler: Couldn't find an entry for pv_cm_id 0x0

This seems to be a bug in mlx4. The pv layer should be duplicating how
cm_acquire_rejected_id() works

Something like this for the cm parts - what do you think?

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4794113ecd596c..fb384bf60b6f02 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -592,20 +592,35 @@ static void cm_free_id(__be32 local_id)
 	xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
 }
 
-static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
+/*
+ * If we know the message is related to a REQ then there is no remote_id set, so
+ * it should not be checked. The state should be in IB_CM_REQ_SENT,
+ * IB_CM_SIDR_REQ_SENT or IB_CM_MRA_REQ_RCVD and the caller should check this.
+ */
+static struct cm_id_private *cm_acquire_req(__be32 local_id)
 {
 	struct cm_id_private *cm_id_priv;
 
 	rcu_read_lock();
 	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
-	if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
-	    !refcount_inc_not_zero(&cm_id_priv->refcount))
+	if (!cm_id_priv || !refcount_inc_not_zero(&cm_id_priv->refcount))
 		cm_id_priv = NULL;
 	rcu_read_unlock();
 
 	return cm_id_priv;
 }
 
+static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
+{
+	struct cm_id_private *cm_id_priv = cm_acquire_req(local_id);
+
+	if (READ_ONCE(cm_id_priv->id.remote_id) != remote_id) {
+		cm_deref_id(cm_id_priv);
+		return NULL;
+	}
+	return cm_id_priv;
+}
+
 /*
  * Trivial helpers to strip endian annotation and compare; the
  * endianness doesn't actually matter since we just need a stable
@@ -1856,6 +1871,10 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg,
 			be32_to_cpu(cm_id_priv->id.local_id));
 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
 		break;
+	case IB_CM_MRA_REQ_RCVD:
+		IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+			be32_to_cpu(cm_id_priv->id.remote_id));
+		fallthrough;
 	default:
 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
 			be32_to_cpu(cm_id_priv->id.local_id));
@@ -2409,8 +2428,8 @@ static int cm_rep_handler(struct cm_work *work)
 	struct cm_timewait_info *timewait_info;
 
 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(
-		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
+	cm_id_priv = cm_acquire_req(
+		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)));
 	if (!cm_id_priv) {
 		cm_dup_rep_handler(work);
 		pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
@@ -2991,7 +3010,8 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 
 	remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
 
-	if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
+	if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT &&
+	    IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg) == 0) {
 		spin_lock_irq(&cm.lock);
 		timewait_info = cm_find_remote_id(
 			*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
@@ -3005,9 +3025,8 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 		spin_unlock_irq(&cm.lock);
 	} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
 		   CM_MSG_RESPONSE_REQ)
-		cm_id_priv = cm_acquire_id(
-			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
-			0);
+		cm_id_priv = cm_acquire_req(
+			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)));
 	else
 		cm_id_priv = cm_acquire_id(
 			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
@@ -3171,9 +3190,8 @@ static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
 {
 	switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
 	case CM_MSG_RESPONSE_REQ:
-		return cm_acquire_id(
-			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
-			0);
+		return cm_acquire_req(
+			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)));
 	case CM_MSG_RESPONSE_REP:
 	case CM_MSG_RESPONSE_OTHER:
 		return cm_acquire_id(
@@ -3211,6 +3229,8 @@ static int cm_mra_handler(struct cm_work *work)
 				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
+		cm_id_priv->id.remote_id =
+			IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg);
 		break;
 	case IB_CM_REP_SENT:
 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
@@ -3769,8 +3789,8 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 
 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
 				work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(
-		cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
+	cm_id_priv = cm_acquire_req(
+		cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)));
 	if (!cm_id_priv)
 		return -EINVAL; /* Unmatched reply. */
 

  reply	other threads:[~2020-04-14 20:40 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-14 11:17 [PATCH for-rc] RDMA/cm: Do not send REJ when remote_id is unknown Håkon Bugge
2020-04-14 20:40 ` Jason Gunthorpe [this message]
2020-04-16 13:24   ` Håkon Bugge
2020-04-16 18:23     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200414204015.GA28572@ziepe.ca \
    --to=jgg@ziepe.ca \
    --cc=dledford@redhat.com \
    --cc=haakon.bugge@oracle.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=ted.h.kim@oracle.com \
    --cc=william.taylor@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.