All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rdma-next v2 0/9] Fix memory corruption in CM
@ 2021-04-21 11:40 Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 1/9] IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg() Leon Romanovsky
                   ` (8 more replies)
  0 siblings, 9 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, linux-kernel, linux-rdma, Mark Zhang

From: Leon Romanovsky <leonro@nvidia.com>

Changelog:
v3:
 * Included Jason's patches in this series
v1: https://lore.kernel.org/linux-rdma/20210411122152.59274-1-leon@kernel.org
 * Squashed "remove mad_agent ..." patches to make sure that we don't
   need to check for the NULL argument.
v0: https://lore.kernel.org/lkml/20210318100309.670344-1-leon@kernel.org

-------------------------------------------------------------------------------

Hi,

This series from Mark fixes long standing bug in CM migration logic,
reported by Ryan [1].

Thanks

[1] https://lore.kernel.org/linux-rdma/CAFMmRNx9cg--NUnZjFM8yWqFaEtsmAWV4EogKb3a0+hnjdtJFA@mail.gmail.com/

Jason Gunthorpe (4):
  IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg()
  IB/cm: Split cm_alloc_msg()
  IB/cm: Call the correct message free functions in cm_send_handler()
  IB/cm: Tidy remaining cm_msg free paths

Mark Zhang (5):
  Revert "IB/cm: Mark stale CM id's whenever the mad agent was
    unregistered"
  IB/cm: Simplify ib_cancel_mad() and ib_modify_mad() calls
  IB/cm: Clear all associated AV's ports when remove a cm device
  IB/cm: Add lock protection when access av/alt_av's port of a cm_id
  IB/cm: Initialize av before aquire the spin lock in cm_lap_handler

 drivers/infiniband/core/cm.c       | 621 ++++++++++++++++-------------
 drivers/infiniband/core/mad.c      |  17 +-
 drivers/infiniband/core/sa_query.c |   4 +-
 include/rdma/ib_mad.h              |  27 +-
 4 files changed, 368 insertions(+), 301 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 1/9] IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg()
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 2/9] IB/cm: Split cm_alloc_msg() Leon Romanovsky
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma, Mark Zhang

From: Jason Gunthorpe <jgg@nvidia.com>

This is not a functional change, but it helps make the purpose of all the
cm_free_msg() calls clearer. In this case a response msg has a NULL
context[0], and is never placed in cm_id_priv->msg.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0ead0d223154..6e20ba5d32e1 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -413,7 +413,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
 
 	ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
 	if (ret) {
-		cm_free_msg(m);
+		ib_free_send_mad(m);
 		return ret;
 	}
 
@@ -421,6 +421,13 @@ static int cm_alloc_response_msg(struct cm_port *port,
 	return 0;
 }
 
+static void cm_free_response_msg(struct ib_mad_send_buf *msg)
+{
+	if (msg->ah)
+		rdma_destroy_ah(msg->ah, 0);
+	ib_free_send_mad(msg);
+}
+
 static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
 {
 	void *data;
@@ -1569,16 +1576,16 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
 	if (ret) {
+		cm_free_msg(cm_id_priv->msg);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		goto error2;
+		goto out;
 	}
 	BUG_ON(cm_id->state != IB_CM_IDLE);
 	cm_id->state = IB_CM_REQ_SENT;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return 0;
-
-error2:	cm_free_msg(cm_id_priv->msg);
-out:	return ret;
+out:
+	return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_req);
 
@@ -1618,7 +1625,7 @@ static int cm_issue_rej(struct cm_port *port,
 		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
-		cm_free_msg(msg);
+		cm_free_response_msg(msg);
 
 	return ret;
 }
@@ -1974,7 +1981,7 @@ static void cm_dup_req_handler(struct cm_work *work,
 	return;
 
 unlock:	spin_unlock_irq(&cm_id_priv->lock);
-free:	cm_free_msg(msg);
+free:	cm_free_response_msg(msg);
 }
 
 static struct cm_id_private *cm_match_req(struct cm_work *work,
@@ -2453,7 +2460,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
 	goto deref;
 
 unlock:	spin_unlock_irq(&cm_id_priv->lock);
-free:	cm_free_msg(msg);
+free:	cm_free_response_msg(msg);
 deref:	cm_deref_id(cm_id_priv);
 }
 
@@ -2794,7 +2801,7 @@ static int cm_issue_drep(struct cm_port *port,
 		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
-		cm_free_msg(msg);
+		cm_free_response_msg(msg);
 
 	return ret;
 }
@@ -2853,7 +2860,7 @@ static int cm_dreq_handler(struct cm_work *work)
 
 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
 		    ib_post_send_mad(msg, NULL))
-			cm_free_msg(msg);
+			cm_free_response_msg(msg);
 		goto deref;
 	case IB_CM_DREQ_RCVD:
 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
@@ -3343,7 +3350,7 @@ static int cm_lap_handler(struct cm_work *work)
 
 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
 		    ib_post_send_mad(msg, NULL))
-			cm_free_msg(msg);
+			cm_free_response_msg(msg);
 		goto deref;
 	case IB_CM_LAP_RCVD:
 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 2/9] IB/cm: Split cm_alloc_msg()
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 1/9] IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg() Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 3/9] IB/cm: Call the correct message free functions in cm_send_handler() Leon Romanovsky
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma, Mark Zhang

From: Jason Gunthorpe <jgg@nvidia.com>

This is being used with two quite different flows, one attaches the
message to the priv and the other does not.

Ensure the message attach is consistently done under the spinlock and
ensure that the free on error always detaches the message from the
cm_id_priv, also always under lock.

This makes read/write to the cm_id_priv->msg consistently locked and
consistently NULL'd when the message is freed, even in all error paths.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 190 +++++++++++++++++++++--------------
 1 file changed, 115 insertions(+), 75 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 6e20ba5d32e1..94613275edcc 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -305,8 +305,7 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
 		complete(&cm_id_priv->comp);
 }
 
-static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
-			struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 {
 	struct ib_mad_agent *mad_agent;
 	struct ib_mad_send_buf *m;
@@ -359,12 +358,42 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	m->retries = cm_id_priv->max_cm_retries;
 
 	refcount_inc(&cm_id_priv->refcount);
+	spin_unlock_irqrestore(&cm.state_lock, flags2);
 	m->context[0] = cm_id_priv;
-	*msg = m;
+	return m;
 
 out:
 	spin_unlock_irqrestore(&cm.state_lock, flags2);
-	return ret;
+	return ERR_PTR(ret);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
+{
+	struct ib_mad_send_buf *msg;
+
+	lockdep_assert_held(&cm_id_priv->lock);
+
+	msg = cm_alloc_msg(cm_id_priv);
+	if (IS_ERR(msg))
+		return msg;
+	cm_id_priv->msg = msg;
+	return msg;
+}
+
+static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
+{
+	struct cm_id_private *cm_id_priv = msg->context[0];
+
+	lockdep_assert_held(&cm_id_priv->lock);
+
+	if (!WARN_ON(cm_id_priv->msg != msg))
+		cm_id_priv->msg = NULL;
+
+	if (msg->ah)
+		rdma_destroy_ah(msg->ah, 0);
+	cm_deref_id(cm_id_priv);
+	ib_free_send_mad(msg);
 }
 
 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
@@ -1508,6 +1537,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 		   struct ib_cm_req_param *param)
 {
 	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
 	struct cm_req_msg *req_msg;
 	unsigned long flags;
 	int ret;
@@ -1559,31 +1589,34 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 	cm_id_priv->pkey = param->primary_path->pkey;
 	cm_id_priv->qp_type = param->qp_type;
 
-	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
-	if (ret)
-		goto out;
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	msg = cm_alloc_priv_msg(cm_id_priv);
+	if (IS_ERR(msg)) {
+		ret = PTR_ERR(msg);
+		goto out_unlock;
+	}
 
-	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+	req_msg = (struct cm_req_msg *)msg->mad;
 	cm_format_req(req_msg, cm_id_priv, param);
 	cm_id_priv->tid = req_msg->hdr.tid;
-	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
-	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
+	msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
 
 	cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
 
 	trace_icm_send_req(&cm_id_priv->id);
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
-	if (ret) {
-		cm_free_msg(cm_id_priv->msg);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		goto out;
-	}
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		goto out_free;
 	BUG_ON(cm_id->state != IB_CM_IDLE);
 	cm_id->state = IB_CM_REQ_SENT;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return 0;
+out_free:
+	cm_free_priv_msg(msg);
+out_unlock:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 out:
 	return ret;
 }
@@ -2290,9 +2323,11 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
+	msg = cm_alloc_priv_msg(cm_id_priv);
+	if (IS_ERR(msg)) {
+		ret = PTR_ERR(msg);
 		goto out;
+	}
 
 	rep_msg = (struct cm_rep_msg *) msg->mad;
 	cm_format_rep(rep_msg, cm_id_priv, param);
@@ -2301,14 +2336,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
 
 	trace_icm_send_rep(cm_id);
 	ret = ib_post_send_mad(msg, NULL);
-	if (ret) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		cm_free_msg(msg);
-		return ret;
-	}
+	if (ret)
+		goto out_free;
 
 	cm_id->state = IB_CM_REP_SENT;
-	cm_id_priv->msg = msg;
 	cm_id_priv->initiator_depth = param->initiator_depth;
 	cm_id_priv->responder_resources = param->responder_resources;
 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
@@ -2316,8 +2347,13 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
 		  "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
 		  param->qp_num);
 	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return 0;
 
-out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out_free:
+	cm_free_priv_msg(msg);
+out:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_rep);
@@ -2364,9 +2400,11 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
 		goto error;
 	}
 
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
+	msg = cm_alloc_msg(cm_id_priv);
+	if (IS_ERR(msg)) {
+		ret = PTR_ERR(msg);
 		goto error;
+	}
 
 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
 		      private_data, private_data_len);
@@ -2664,10 +2702,10 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
 	    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret) {
+	msg = cm_alloc_priv_msg(cm_id_priv);
+	if (IS_ERR(msg)) {
 		cm_enter_timewait(cm_id_priv);
-		return ret;
+		return PTR_ERR(msg);
 	}
 
 	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
@@ -2679,12 +2717,11 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_enter_timewait(cm_id_priv);
-		cm_free_msg(msg);
+		cm_free_priv_msg(msg);
 		return ret;
 	}
 
 	cm_id_priv->id.state = IB_CM_DREQ_SENT;
-	cm_id_priv->msg = msg;
 	return 0;
 }
 
@@ -2739,9 +2776,9 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
 	cm_set_private_data(cm_id_priv, private_data, private_data_len);
 	cm_enter_timewait(cm_id_priv);
 
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
-		return ret;
+	msg = cm_alloc_msg(cm_id_priv);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
 		       private_data, private_data_len);
@@ -2934,9 +2971,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
 		cm_reset_to_idle(cm_id_priv);
-		ret = cm_alloc_msg(cm_id_priv, &msg);
-		if (ret)
-			return ret;
+		msg = cm_alloc_msg(cm_id_priv);
+		if (IS_ERR(msg))
+			return PTR_ERR(msg);
 		cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
 			      ari, ari_length, private_data, private_data_len,
 			      state);
@@ -2944,9 +2981,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
 		cm_enter_timewait(cm_id_priv);
-		ret = cm_alloc_msg(cm_id_priv, &msg);
-		if (ret)
-			return ret;
+		msg = cm_alloc_msg(cm_id_priv);
+		if (IS_ERR(msg))
+			return PTR_ERR(msg);
 		cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
 			      ari, ari_length, private_data, private_data_len,
 			      state);
@@ -3124,13 +3161,15 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 	default:
 		trace_icm_send_mra_unknown_err(&cm_id_priv->id);
 		ret = -EINVAL;
-		goto error1;
+		goto error_unlock;
 	}
 
 	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
-		ret = cm_alloc_msg(cm_id_priv, &msg);
-		if (ret)
-			goto error1;
+		msg = cm_alloc_msg(cm_id_priv);
+		if (IS_ERR(msg)) {
+			ret = PTR_ERR(msg);
+			goto error_unlock;
+		}
 
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      msg_response, service_timeout,
@@ -3138,7 +3177,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 		trace_icm_send_mra(cm_id);
 		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
-			goto error2;
+			goto error_free_msg;
 	}
 
 	cm_id->state = cm_state;
@@ -3148,13 +3187,11 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return 0;
 
-error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-	kfree(data);
-	return ret;
-
-error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-	kfree(data);
+error_free_msg:
 	cm_free_msg(msg);
+error_unlock:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	kfree(data);
 	return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_mra);
@@ -3490,38 +3527,41 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 				 &cm_id_priv->av,
 				 cm_id_priv);
 	if (ret)
-		goto out;
+		return ret;
 
 	cm_id->service_id = param->service_id;
 	cm_id->service_mask = ~cpu_to_be64(0);
 	cm_id_priv->timeout_ms = param->timeout_ms;
 	cm_id_priv->max_cm_retries = param->max_cm_retries;
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
-		goto out;
-
-	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
-			   param);
-	msg->timeout_ms = cm_id_priv->timeout_ms;
-	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	if (cm_id->state == IB_CM_IDLE) {
-		trace_icm_send_sidr_req(&cm_id_priv->id);
-		ret = ib_post_send_mad(msg, NULL);
-	} else {
+	if (cm_id->state != IB_CM_IDLE) {
 		ret = -EINVAL;
+		goto out_unlock;
 	}
 
-	if (ret) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		cm_free_msg(msg);
-		goto out;
+	msg = cm_alloc_priv_msg(cm_id_priv);
+	if (IS_ERR(msg)) {
+		ret = PTR_ERR(msg);
+		goto out_unlock;
 	}
+
+	cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
+			   param);
+	msg->timeout_ms = cm_id_priv->timeout_ms;
+	msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
+
+	trace_icm_send_sidr_req(&cm_id_priv->id);
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		goto out_free;
 	cm_id->state = IB_CM_SIDR_REQ_SENT;
-	cm_id_priv->msg = msg;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-out:
+	return 0;
+out_free:
+	cm_free_priv_msg(msg);
+out_unlock:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_sidr_req);
@@ -3668,9 +3708,9 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
 	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
 		return -EINVAL;
 
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
-		return ret;
+	msg = cm_alloc_msg(cm_id_priv);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
 			   param);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 3/9] IB/cm: Call the correct message free functions in cm_send_handler()
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 1/9] IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg() Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 2/9] IB/cm: Split cm_alloc_msg() Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 4/9] IB/cm: Tidy remaining cm_msg free paths Leon Romanovsky
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@nvidia.com>

There are now three destroy functions for the cm_msg, and all places
except the general send completion handler use the correct function.

Fix cm_send_handler() to detect which kind of message is being completed
and destroy it using the correct function with the correct locking.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 52 +++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 94613275edcc..8dbc39ea4612 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3795,22 +3795,26 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 	return -EINVAL;
 }
 
-static void cm_process_send_error(struct ib_mad_send_buf *msg,
+static void cm_process_send_error(struct cm_id_private *cm_id_priv,
+				  struct ib_mad_send_buf *msg,
+				  enum ib_cm_state state,
 				  enum ib_wc_status wc_status)
 {
-	struct cm_id_private *cm_id_priv;
-	struct ib_cm_event cm_event;
-	enum ib_cm_state state;
+	struct ib_cm_event cm_event = {};
 	int ret;
 
-	memset(&cm_event, 0, sizeof cm_event);
-	cm_id_priv = msg->context[0];
-
 	/* Discard old sends or ones without a response. */
 	spin_lock_irq(&cm_id_priv->lock);
-	state = (enum ib_cm_state) (unsigned long) msg->context[1];
-	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
-		goto discard;
+	if (msg != cm_id_priv->msg) {
+		spin_unlock_irq(&cm_id_priv->lock);
+		cm_free_msg(msg);
+		return;
+	}
+	cm_free_priv_msg(msg);
+
+	if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS ||
+	    wc_status == IB_WC_WR_FLUSH_ERR)
+		goto out_unlock;
 
 	trace_icm_mad_send_err(state, wc_status);
 	switch (state) {
@@ -3833,26 +3837,27 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
 		cm_event.event = IB_CM_SIDR_REQ_ERROR;
 		break;
 	default:
-		goto discard;
+		goto out_unlock;
 	}
 	spin_unlock_irq(&cm_id_priv->lock);
 	cm_event.param.send_status = wc_status;
 
 	/* No other events can occur on the cm_id at this point. */
 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
-	cm_free_msg(msg);
 	if (ret)
 		ib_destroy_cm_id(&cm_id_priv->id);
 	return;
-discard:
+out_unlock:
 	spin_unlock_irq(&cm_id_priv->lock);
-	cm_free_msg(msg);
 }
 
 static void cm_send_handler(struct ib_mad_agent *mad_agent,
 			    struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+	struct cm_id_private *cm_id_priv = msg->context[0];
+	enum ib_cm_state state =
+		(enum ib_cm_state)(unsigned long)msg->context[1];
 	struct cm_port *port;
 	u16 attr_index;
 
@@ -3865,7 +3870,7 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
 	 * set to a cm_id), and is not a REJ, then it is a send that was
 	 * manually retried.
 	 */
-	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+	if (!cm_id_priv && (attr_index != CM_REJ_COUNTER))
 		msg->retries = 1;
 
 	atomic_long_add(1 + msg->retries,
@@ -3875,18 +3880,11 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
 				&port->counter_group[CM_XMIT_RETRIES].
 				counter[attr_index]);
 
-	switch (mad_send_wc->status) {
-	case IB_WC_SUCCESS:
-	case IB_WC_WR_FLUSH_ERR:
-		cm_free_msg(msg);
-		break;
-	default:
-		if (msg->context[0] && msg->context[1])
-			cm_process_send_error(msg, mad_send_wc->status);
-		else
-			cm_free_msg(msg);
-		break;
-	}
+	if (cm_id_priv)
+		cm_process_send_error(cm_id_priv, msg, state,
+				      mad_send_wc->status);
+	else
+		cm_free_response_msg(msg);
 }
 
 static void cm_work_handler(struct work_struct *_work)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 4/9] IB/cm: Tidy remaining cm_msg free paths
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
                   ` (2 preceding siblings ...)
  2021-04-21 11:40 ` [PATCH rdma-next v2 3/9] IB/cm: Call the correct message free functions in cm_send_handler() Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 5/9] Revert "IB/cm: Mark stale CM id's whenever the mad agent was unregistered" Leon Romanovsky
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@nvidia.com>

Now that all the free paths are explicit cm_free_msg() will only be called
for msgs's allocated with cm_alloc_msg(), so we can assume the context is
set. Place it after the allocation function it is paired with for clarity.

Also remove a bogus NULL assignment in one place after a cancel. This does
nothing other than disable completions to become events, but changing the
state already did that.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 8dbc39ea4612..1f0bc31ca0e2 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -367,6 +367,16 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	return ERR_PTR(ret);
 }
 
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+	struct cm_id_private *cm_id_priv = msg->context[0];
+
+	if (msg->ah)
+		rdma_destroy_ah(msg->ah, 0);
+	cm_deref_id(cm_id_priv);
+	ib_free_send_mad(msg);
+}
+
 static struct ib_mad_send_buf *
 cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
 {
@@ -420,15 +430,6 @@ static int cm_create_response_msg_ah(struct cm_port *port,
 	return 0;
 }
 
-static void cm_free_msg(struct ib_mad_send_buf *msg)
-{
-	if (msg->ah)
-		rdma_destroy_ah(msg->ah, 0);
-	if (msg->context[0])
-		cm_deref_id(msg->context[0]);
-	ib_free_send_mad(msg);
-}
-
 static int cm_alloc_response_msg(struct cm_port *port,
 				 struct ib_mad_recv_wc *mad_recv_wc,
 				 struct ib_mad_send_buf **msg)
@@ -3455,7 +3456,6 @@ static int cm_apr_handler(struct cm_work *work)
 	}
 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-	cm_id_priv->msg = NULL;
 	cm_queue_work_unlock(cm_id_priv, work);
 	return 0;
 out:
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 5/9] Revert "IB/cm: Mark stale CM id's whenever the mad agent was unregistered"
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
                   ` (3 preceding siblings ...)
  2021-04-21 11:40 ` [PATCH rdma-next v2 4/9] IB/cm: Tidy remaining cm_msg free paths Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 6/9] IB/cm: Simplify ib_cancel_mad() and ib_modify_mad() calls Leon Romanovsky
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma

From: Mark Zhang <markzhang@nvidia.com>

This reverts commit 9db0ff53cb9b43ed75bacd42a89c1a0ab048b2b0, which
wasn't full and still causes to the following panic:

panic @ time 1605623870.843, thread 0xfffffeb63b552000: vm_fault_lookup: fault on nofault entry, addr: 0xfffffe811a94e000
    time = 1605623870
    cpuid = 9, TSC = 0xb7937acc1b6
    Panic occurred in module kernel loaded at 0xffffffff80200000:Stack: --------------------------------------------------
    kernel:vm_fault+0x19da
    kernel:vm_fault_trap+0x6e
    kernel:trap_pfault+0x1f1
    kernel:trap+0x31e
    kernel:cm_destroy_id+0x38c
    kernel:rdma_destroy_id+0x127
    kernel:sdp_shutdown_task+0x3ae
    kernel:taskqueue_run_locked+0x10b
    kernel:taskqueue_thread_loop+0x87
    kernel:fork_exit+0x83

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 123 ++++-------------------------------
 1 file changed, 14 insertions(+), 109 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1f0bc31ca0e2..8a7ac605fded 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -121,8 +121,6 @@ static struct ib_cm {
 	__be32 random_id_operand;
 	struct list_head timewait_list;
 	struct workqueue_struct *wq;
-	/* Sync on cm change port state */
-	spinlock_t state_lock;
 } cm;
 
 /* Counter indexes ordered by attribute ID */
@@ -203,8 +201,6 @@ struct cm_port {
 	struct cm_device *cm_dev;
 	struct ib_mad_agent *mad_agent;
 	u32 port_num;
-	struct list_head cm_priv_prim_list;
-	struct list_head cm_priv_altr_list;
 	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
 };
 
@@ -285,12 +281,6 @@ struct cm_id_private {
 	u8 service_timeout;
 	u8 target_ack_delay;
 
-	struct list_head prim_list;
-	struct list_head altr_list;
-	/* Indicates that the send port mad is registered and av is set */
-	int prim_send_port_not_ready;
-	int altr_send_port_not_ready;
-
 	struct list_head work_list;
 	atomic_t work_count;
 
@@ -310,47 +300,20 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	struct ib_mad_agent *mad_agent;
 	struct ib_mad_send_buf *m;
 	struct ib_ah *ah;
-	struct cm_av *av;
-	unsigned long flags, flags2;
-	int ret = 0;
 
-	/* don't let the port to be released till the agent is down */
-	spin_lock_irqsave(&cm.state_lock, flags2);
-	spin_lock_irqsave(&cm.lock, flags);
-	if (!cm_id_priv->prim_send_port_not_ready)
-		av = &cm_id_priv->av;
-	else if (!cm_id_priv->altr_send_port_not_ready &&
-		 (cm_id_priv->alt_av.port))
-		av = &cm_id_priv->alt_av;
-	else {
-		pr_info("%s: not valid CM id\n", __func__);
-		ret = -ENODEV;
-		spin_unlock_irqrestore(&cm.lock, flags);
-		goto out;
-	}
-	spin_unlock_irqrestore(&cm.lock, flags);
-	/* Make sure the port haven't released the mad yet */
 	mad_agent = cm_id_priv->av.port->mad_agent;
-	if (!mad_agent) {
-		pr_info("%s: not a valid MAD agent\n", __func__);
-		ret = -ENODEV;
-		goto out;
-	}
-	ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
-	if (IS_ERR(ah)) {
-		ret = PTR_ERR(ah);
-		goto out;
-	}
+	ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
+	if (IS_ERR(ah))
+		return (void *)ah;
 
 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
-			       av->pkey_index,
+			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC,
 			       IB_MGMT_BASE_VERSION);
 	if (IS_ERR(m)) {
 		rdma_destroy_ah(ah, 0);
-		ret = PTR_ERR(m);
-		goto out;
+		return m;
 	}
 
 	/* Timeout set by caller if response is expected. */
@@ -358,13 +321,8 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	m->retries = cm_id_priv->max_cm_retries;
 
 	refcount_inc(&cm_id_priv->refcount);
-	spin_unlock_irqrestore(&cm.state_lock, flags2);
 	m->context[0] = cm_id_priv;
 	return m;
-
-out:
-	spin_unlock_irqrestore(&cm.state_lock, flags2);
-	return ERR_PTR(ret);
 }
 
 static void cm_free_msg(struct ib_mad_send_buf *msg)
@@ -518,21 +476,6 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
 				       grh, &av->ah_attr);
 }
 
-static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
-				   struct cm_av *av, struct cm_port *port)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cm.lock, flags);
-	if (&cm_id_priv->av == av)
-		list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
-	else if (&cm_id_priv->alt_av == av)
-		list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
-	else
-		WARN_ON(true);
-	spin_unlock_irqrestore(&cm.lock, flags);
-}
-
 static struct cm_port *
 get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
 {
@@ -576,8 +519,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
 
 static int cm_init_av_by_path(struct sa_path_rec *path,
 			      const struct ib_gid_attr *sgid_attr,
-			      struct cm_av *av,
-			      struct cm_id_private *cm_id_priv)
+			      struct cm_av *av)
 {
 	struct rdma_ah_attr new_ah_attr;
 	struct cm_device *cm_dev;
@@ -611,7 +553,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 		return ret;
 
 	av->timeout = path->packet_life_time + 1;
-	add_cm_id_to_port_list(cm_id_priv, av, port);
 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
@@ -891,8 +832,6 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
 	spin_lock_init(&cm_id_priv->lock);
 	init_completion(&cm_id_priv->comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
-	INIT_LIST_HEAD(&cm_id_priv->prim_list);
-	INIT_LIST_HEAD(&cm_id_priv->altr_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	refcount_set(&cm_id_priv->refcount, 1);
 
@@ -1193,12 +1132,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		kfree(cm_id_priv->timewait_info);
 		cm_id_priv->timewait_info = NULL;
 	}
-	if (!list_empty(&cm_id_priv->altr_list) &&
-	    (!cm_id_priv->altr_send_port_not_ready))
-		list_del(&cm_id_priv->altr_list);
-	if (!list_empty(&cm_id_priv->prim_list) &&
-	    (!cm_id_priv->prim_send_port_not_ready))
-		list_del(&cm_id_priv->prim_list);
+
 	WARN_ON(cm_id_priv->listen_sharecount);
 	WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
 	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
@@ -1566,13 +1500,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 	}
 
 	ret = cm_init_av_by_path(param->primary_path,
-				 param->ppath_sgid_attr, &cm_id_priv->av,
-				 cm_id_priv);
+				 param->ppath_sgid_attr, &cm_id_priv->av);
 	if (ret)
 		goto out;
 	if (param->alternate_path) {
 		ret = cm_init_av_by_path(param->alternate_path, NULL,
-					 &cm_id_priv->alt_av, cm_id_priv);
+					 &cm_id_priv->alt_av);
 		if (ret)
 			goto out;
 	}
@@ -2204,8 +2137,7 @@ static int cm_req_handler(struct cm_work *work)
 		sa_path_set_dmac(&work->path[0],
 				 cm_id_priv->av.ah_attr.roce.dmac);
 	work->path[0].hop_limit = grh->hop_limit;
-	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
-				 cm_id_priv);
+	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
 	if (ret) {
 		int err;
 
@@ -2224,7 +2156,7 @@ static int cm_req_handler(struct cm_work *work)
 	}
 	if (cm_req_has_alt_path(req_msg)) {
 		ret = cm_init_av_by_path(&work->path[1], NULL,
-					 &cm_id_priv->alt_av, cm_id_priv);
+					 &cm_id_priv->alt_av);
 		if (ret) {
 			ib_send_cm_rej(&cm_id_priv->id,
 				       IB_CM_REJ_INVALID_ALT_GID,
@@ -3405,7 +3337,7 @@ static int cm_lap_handler(struct cm_work *work)
 		goto unlock;
 
 	ret = cm_init_av_by_path(param->alternate_path, NULL,
-				 &cm_id_priv->alt_av, cm_id_priv);
+				 &cm_id_priv->alt_av);
 	if (ret)
 		goto unlock;
 
@@ -3524,8 +3456,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 	ret = cm_init_av_by_path(param->path, param->sgid_attr,
-				 &cm_id_priv->av,
-				 cm_id_priv);
+				 &cm_id_priv->av);
 	if (ret)
 		return ret;
 
@@ -4008,9 +3939,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
 static int cm_migrate(struct ib_cm_id *cm_id)
 {
 	struct cm_id_private *cm_id_priv;
-	struct cm_av tmp_av;
 	unsigned long flags;
-	int tmp_send_port_not_ready;
 	int ret = 0;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -4019,14 +3948,7 @@ static int cm_migrate(struct ib_cm_id *cm_id)
 	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
 	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
 		cm_id->lap_state = IB_CM_LAP_IDLE;
-		/* Swap address vector */
-		tmp_av = cm_id_priv->av;
 		cm_id_priv->av = cm_id_priv->alt_av;
-		cm_id_priv->alt_av = tmp_av;
-		/* Swap port send ready state */
-		tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
-		cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
-		cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
 	} else
 		ret = -EINVAL;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -4401,9 +4323,6 @@ static int cm_add_one(struct ib_device *ib_device)
 		port->cm_dev = cm_dev;
 		port->port_num = i;
 
-		INIT_LIST_HEAD(&port->cm_priv_prim_list);
-		INIT_LIST_HEAD(&port->cm_priv_altr_list);
-
 		ret = cm_create_port_fs(port);
 		if (ret)
 			goto error1;
@@ -4467,8 +4386,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 {
 	struct cm_device *cm_dev = client_data;
 	struct cm_port *port;
-	struct cm_id_private *cm_id_priv;
-	struct ib_mad_agent *cur_mad_agent;
 	struct ib_port_modify port_modify = {
 		.clr_port_cap_mask = IB_PORT_CM_SUP
 	};
@@ -4489,24 +4406,13 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 
 		port = cm_dev->port[i-1];
 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
-		/* Mark all the cm_id's as not valid */
-		spin_lock_irq(&cm.lock);
-		list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
-			cm_id_priv->altr_send_port_not_ready = 1;
-		list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
-			cm_id_priv->prim_send_port_not_ready = 1;
-		spin_unlock_irq(&cm.lock);
 		/*
 		 * We flush the queue here after the going_down set, this
 		 * verify that no new works will be queued in the recv handler,
 		 * after that we can call the unregister_mad_agent
 		 */
 		flush_workqueue(cm.wq);
-		spin_lock_irq(&cm.state_lock);
-		cur_mad_agent = port->mad_agent;
-		port->mad_agent = NULL;
-		spin_unlock_irq(&cm.state_lock);
-		ib_unregister_mad_agent(cur_mad_agent);
+		ib_unregister_mad_agent(port->mad_agent);
 		cm_remove_port_fs(port);
 		kfree(port);
 	}
@@ -4521,7 +4427,6 @@ static int __init ib_cm_init(void)
 	INIT_LIST_HEAD(&cm.device_list);
 	rwlock_init(&cm.device_lock);
 	spin_lock_init(&cm.lock);
-	spin_lock_init(&cm.state_lock);
 	cm.listen_service_table = RB_ROOT;
 	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
 	cm.remote_id_table = RB_ROOT;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 6/9] IB/cm: Simplify ib_cancel_mad() and ib_modify_mad() calls
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
                   ` (4 preceding siblings ...)
  2021-04-21 11:40 ` [PATCH rdma-next v2 5/9] Revert "IB/cm: Mark stale CM id's whenever the mad agent was unregistered" Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device Leon Romanovsky
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma

From: Mark Zhang <markzhang@nvidia.com>

The mad_agent parameter is redundant since the struct ib_mad_send_buf
already has a pointer of it.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c       | 42 ++++++++++++++----------------
 drivers/infiniband/core/mad.c      | 17 +++++-------
 drivers/infiniband/core/sa_query.c |  4 +--
 include/rdma/ib_mad.h              | 27 +++++++++----------
 4 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 8a7ac605fded..2d62c90f9790 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1058,7 +1058,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		break;
 	case IB_CM_SIDR_REQ_SENT:
 		cm_id->state = IB_CM_IDLE;
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		break;
 	case IB_CM_SIDR_REQ_RCVD:
 		cm_send_sidr_rep_locked(cm_id_priv,
@@ -1069,7 +1069,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		break;
 	case IB_CM_REQ_SENT:
 	case IB_CM_MRA_REQ_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
 				   &cm_id_priv->id.device->node_guid,
 				   sizeof(cm_id_priv->id.device->node_guid),
@@ -1087,7 +1087,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		break;
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
 				   0, NULL, 0);
 		goto retest;
@@ -1105,7 +1105,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		cm_send_dreq_locked(cm_id_priv, NULL, 0);
 		goto retest;
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		cm_enter_timewait(cm_id_priv);
 		goto retest;
 	case IB_CM_DREQ_RCVD:
@@ -2531,7 +2531,7 @@ static int cm_rep_handler(struct cm_work *work)
 			cm_ack_timeout(cm_id_priv->target_ack_delay,
 				       cm_id_priv->alt_av.timeout - 1);
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->msg);
 	cm_queue_work_unlock(cm_id_priv, work);
 	return 0;
 
@@ -2555,7 +2555,7 @@ static int cm_establish_handler(struct cm_work *work)
 		goto out;
 	}
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->msg);
 	cm_queue_work_unlock(cm_id_priv, work);
 	return 0;
 out:
@@ -2588,7 +2588,7 @@ static int cm_rtu_handler(struct cm_work *work)
 	}
 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->msg);
 	cm_queue_work_unlock(cm_id_priv, work);
 	return 0;
 out:
@@ -2633,7 +2633,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
 
 	if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
 	    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 
 	msg = cm_alloc_priv_msg(cm_id_priv);
 	if (IS_ERR(msg)) {
@@ -2807,12 +2807,12 @@ static int cm_dreq_handler(struct cm_work *work)
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REP_SENT:
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		break;
 	case IB_CM_ESTABLISHED:
 		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
 		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
-			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+			ib_cancel_mad(cm_id_priv->msg);
 		break;
 	case IB_CM_MRA_REP_RCVD:
 		break;
@@ -2873,7 +2873,7 @@ static int cm_drep_handler(struct cm_work *work)
 	}
 	cm_enter_timewait(cm_id_priv);
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->msg);
 	cm_queue_work_unlock(cm_id_priv, work);
 	return 0;
 out:
@@ -3009,7 +3009,7 @@ static int cm_rej_handler(struct cm_work *work)
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		fallthrough;
 	case IB_CM_REQ_RCVD:
 	case IB_CM_MRA_REQ_SENT:
@@ -3019,7 +3019,7 @@ static int cm_rej_handler(struct cm_work *work)
 			cm_reset_to_idle(cm_id_priv);
 		break;
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->msg);
 		fallthrough;
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
@@ -3029,8 +3029,7 @@ static int cm_rej_handler(struct cm_work *work)
 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
 		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
 			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
-				ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-					      cm_id_priv->msg);
+				ib_cancel_mad(cm_id_priv->msg);
 			cm_enter_timewait(cm_id_priv);
 			break;
 		}
@@ -3169,16 +3168,14 @@ static int cm_mra_handler(struct cm_work *work)
 	case IB_CM_REQ_SENT:
 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
 			    CM_MSG_RESPONSE_REQ ||
-		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  cm_id_priv->msg, timeout))
+		    ib_modify_mad(cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
 		break;
 	case IB_CM_REP_SENT:
 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
 			    CM_MSG_RESPONSE_REP ||
-		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  cm_id_priv->msg, timeout))
+		    ib_modify_mad(cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
 		break;
@@ -3186,8 +3183,7 @@ static int cm_mra_handler(struct cm_work *work)
 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
 			    CM_MSG_RESPONSE_OTHER ||
 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
-		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  cm_id_priv->msg, timeout)) {
+		    ib_modify_mad(cm_id_priv->msg, timeout)) {
 			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
 				atomic_long_inc(&work->port->
 						counter_group[CM_RECV_DUPLICATES].
@@ -3387,7 +3383,7 @@ static int cm_apr_handler(struct cm_work *work)
 		goto out;
 	}
 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->msg);
 	cm_queue_work_unlock(cm_id_priv, work);
 	return 0;
 out:
@@ -3715,7 +3711,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 		goto out;
 	}
 	cm_id_priv->id.state = IB_CM_IDLE;
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->msg);
 	spin_unlock_irq(&cm_id_priv->lock);
 
 	cm_format_sidr_rep_event(work, cm_id_priv);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 2081e4854fb0..df6226f45047 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2459,16 +2459,18 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
 	return NULL;
 }
 
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
-		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
 	unsigned long flags;
 	int active;
 
-	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
-				      agent);
+	if (!send_buf)
+		return -EINVAL;
+
+	mad_agent_priv = container_of(send_buf->mad_agent,
+				      struct ib_mad_agent_private, agent);
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
 	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
@@ -2493,13 +2495,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
 }
 EXPORT_SYMBOL(ib_modify_mad);
 
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
-		   struct ib_mad_send_buf *send_buf)
-{
-	ib_modify_mad(mad_agent, send_buf, 0);
-}
-EXPORT_SYMBOL(ib_cancel_mad);
-
 static void local_completions(struct work_struct *work)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8f1705c403b4..9a4a49c37922 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1172,7 +1172,6 @@ EXPORT_SYMBOL(ib_sa_unregister_client);
 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
 {
 	unsigned long flags;
-	struct ib_mad_agent *agent;
 	struct ib_mad_send_buf *mad_buf;
 
 	xa_lock_irqsave(&queries, flags);
@@ -1180,7 +1179,6 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
 		xa_unlock_irqrestore(&queries, flags);
 		return;
 	}
-	agent = query->port->agent;
 	mad_buf = query->mad_buf;
 	xa_unlock_irqrestore(&queries, flags);
 
@@ -1190,7 +1188,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
 	 * sent to the MAD layer and has to be cancelled from there.
 	 */
 	if (!ib_nl_cancel_request(query))
-		ib_cancel_mad(agent, mad_buf);
+		ib_cancel_mad(mad_buf);
 }
 EXPORT_SYMBOL(ib_sa_cancel_query);
 
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index f1d34f06a68b..465b0d0bdaf8 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -717,28 +717,27 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
  */
 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
 
-/**
- * ib_cancel_mad - Cancels an outstanding send MAD operation.
- * @mad_agent: Specifies the registration associated with sent MAD.
- * @send_buf: Indicates the MAD to cancel.
- *
- * MADs will be returned to the user through the corresponding
- * ib_mad_send_handler.
- */
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
-		   struct ib_mad_send_buf *send_buf);
-
 /**
  * ib_modify_mad - Modifies an outstanding send MAD operation.
- * @mad_agent: Specifies the registration associated with sent MAD.
  * @send_buf: Indicates the MAD to modify.
  * @timeout_ms: New timeout value for sent MAD.
  *
  * This call will reset the timeout value for a sent MAD to the specified
  * value.
  */
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
-		  struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+
+/**
+ * ib_cancel_mad - Cancels an outstanding send MAD operation.
+ * @send_buf: Indicates the MAD to cancel.
+ *
+ * MADs will be returned to the user through the corresponding
+ * ib_mad_send_handler.
+ */
+static inline void ib_cancel_mad(struct ib_mad_send_buf *send_buf)
+{
+	ib_modify_mad(send_buf, 0);
+}
 
 /**
  * ib_create_send_mad - Allocate and initialize a data buffer and work request
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
                   ` (5 preceding siblings ...)
  2021-04-21 11:40 ` [PATCH rdma-next v2 6/9] IB/cm: Simplify ib_cancel_mad() and ib_modify_mad() calls Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-22 19:34   ` Jason Gunthorpe
  2021-04-21 11:40 ` [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id Leon Romanovsky
  2021-04-21 11:40 ` [PATCH rdma-next v2 9/9] IB/cm: Initialize av before aquire the spin lock in cm_lap_handler Leon Romanovsky
  8 siblings, 1 reply; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma

From: Mark Zhang <markzhang@nvidia.com>

When remove a cm device all ports are removed as well, so all AV's ports
need to be cleared.
This patch adds a cm_id_priv list for each cm_device; For a cm_id when
initializing it's primary AV it is added to this list, so when
removing the device all cm_id's on this list will be removed from this
list and have its av->port and alt_av->port pointer cleared.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 75 ++++++++++++++++++++++++++++++------
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2d62c90f9790..f1a24492924f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -209,6 +209,7 @@ struct cm_device {
 	struct ib_device *ib_device;
 	u8 ack_delay;
 	int going_down;
+	struct list_head cm_id_priv_list;
 	struct cm_port *port[];
 };
 
@@ -285,6 +286,8 @@ struct cm_id_private {
 	atomic_t work_count;
 
 	struct rdma_ucm_ece ece;
+
+	struct list_head cm_dev_list;
 };
 
 static void cm_work_handler(struct work_struct *work);
@@ -440,9 +443,28 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
 	cm_id_priv->private_data_len = private_data_len;
 }
 
+static void add_cm_id_to_cm_dev_list(struct cm_id_private *cm_id_priv,
+				     struct cm_device *cm_dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm.lock, flags);
+	if (cm_dev->going_down)
+		goto out;
+
+	if (!list_empty(&cm_id_priv->cm_dev_list))
+		list_del(&cm_id_priv->cm_dev_list);
+	list_add_tail(&cm_id_priv->cm_dev_list, &cm_dev->cm_id_priv_list);
+
+out:
+	spin_unlock_irqrestore(&cm.lock, flags);
+}
+
 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
-			      struct ib_grh *grh, struct cm_av *av)
+			      struct ib_grh *grh,
+			      struct cm_id_private *cm_id_priv)
 {
+	struct cm_av *av = &cm_id_priv->av;
 	struct rdma_ah_attr new_ah_attr;
 	int ret;
 
@@ -462,14 +484,20 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
 	if (ret)
 		return ret;
 
+	add_cm_id_to_cm_dev_list(cm_id_priv, port->cm_dev);
+
 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
 
 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
-				   struct ib_grh *grh, struct cm_av *av)
+				   struct ib_grh *grh,
+				   struct cm_id_private *cm_id_priv)
 {
+	struct cm_av *av = &cm_id_priv->av;
+
 	av->port = port;
+	add_cm_id_to_cm_dev_list(cm_id_priv, port->cm_dev);
 	av->pkey_index = wc->pkey_index;
 	return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
 				       port->port_num, wc,
@@ -519,11 +547,13 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
 
 static int cm_init_av_by_path(struct sa_path_rec *path,
 			      const struct ib_gid_attr *sgid_attr,
-			      struct cm_av *av)
+			      struct cm_id_private *cm_id_priv,
+			      bool is_priv_av)
 {
 	struct rdma_ah_attr new_ah_attr;
 	struct cm_device *cm_dev;
 	struct cm_port *port;
+	struct cm_av *av;
 	int ret;
 
 	port = get_cm_port_from_path(path, sgid_attr);
@@ -531,6 +561,11 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 		return -EINVAL;
 	cm_dev = port->cm_dev;
 
+	if (!is_priv_av && cm_dev != cm_id_priv->av.port->cm_dev)
+		return -EINVAL;
+
+	av = is_priv_av ? &cm_id_priv->av : &cm_id_priv->alt_av;
+
 	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
 				  be16_to_cpu(path->pkey), &av->pkey_index);
 	if (ret)
@@ -554,6 +589,9 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 
 	av->timeout = path->packet_life_time + 1;
 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
+	if (is_priv_av)
+		add_cm_id_to_cm_dev_list(cm_id_priv, cm_dev);
+
 	return 0;
 }
 
@@ -832,6 +870,7 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
 	spin_lock_init(&cm_id_priv->lock);
 	init_completion(&cm_id_priv->comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	INIT_LIST_HEAD(&cm_id_priv->cm_dev_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	refcount_set(&cm_id_priv->refcount, 1);
 
@@ -1133,6 +1172,8 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		cm_id_priv->timewait_info = NULL;
 	}
 
+	if (!list_empty(&cm_id_priv->cm_dev_list))
+		list_del(&cm_id_priv->cm_dev_list);
 	WARN_ON(cm_id_priv->listen_sharecount);
 	WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
 	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
@@ -1500,12 +1541,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 	}
 
 	ret = cm_init_av_by_path(param->primary_path,
-				 param->ppath_sgid_attr, &cm_id_priv->av);
+				 param->ppath_sgid_attr, cm_id_priv, true);
 	if (ret)
 		goto out;
 	if (param->alternate_path) {
 		ret = cm_init_av_by_path(param->alternate_path, NULL,
-					 &cm_id_priv->alt_av);
+					 cm_id_priv, false);
 		if (ret)
 			goto out;
 	}
@@ -2083,7 +2124,7 @@ static int cm_req_handler(struct cm_work *work)
 
 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
 				      work->mad_recv_wc->recv_buf.grh,
-				      &cm_id_priv->av);
+				      cm_id_priv);
 	if (ret)
 		goto destroy;
 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
@@ -2137,7 +2178,7 @@ static int cm_req_handler(struct cm_work *work)
 		sa_path_set_dmac(&work->path[0],
 				 cm_id_priv->av.ah_attr.roce.dmac);
 	work->path[0].hop_limit = grh->hop_limit;
-	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
+	ret = cm_init_av_by_path(&work->path[0], gid_attr, cm_id_priv, true);
 	if (ret) {
 		int err;
 
@@ -2156,7 +2197,7 @@ static int cm_req_handler(struct cm_work *work)
 	}
 	if (cm_req_has_alt_path(req_msg)) {
 		ret = cm_init_av_by_path(&work->path[1], NULL,
-					 &cm_id_priv->alt_av);
+					 cm_id_priv, false);
 		if (ret) {
 			ib_send_cm_rej(&cm_id_priv->id,
 				       IB_CM_REJ_INVALID_ALT_GID,
@@ -3328,12 +3369,12 @@ static int cm_lap_handler(struct cm_work *work)
 
 	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
 				 work->mad_recv_wc->recv_buf.grh,
-				 &cm_id_priv->av);
+				 cm_id_priv);
 	if (ret)
 		goto unlock;
 
 	ret = cm_init_av_by_path(param->alternate_path, NULL,
-				 &cm_id_priv->alt_av);
+				 cm_id_priv, false);
 	if (ret)
 		goto unlock;
 
@@ -3452,7 +3493,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 	ret = cm_init_av_by_path(param->path, param->sgid_attr,
-				 &cm_id_priv->av);
+				 cm_id_priv, true);
 	if (ret)
 		return ret;
 
@@ -3542,7 +3583,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	cm_id_priv->av.dgid.global.interface_id = 0;
 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
 				      work->mad_recv_wc->recv_buf.grh,
-				      &cm_id_priv->av);
+				      cm_id_priv);
 	if (ret)
 		goto out;
 
@@ -4303,6 +4344,7 @@ static int cm_add_one(struct ib_device *ib_device)
 	cm_dev->ib_device = ib_device;
 	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
 	cm_dev->going_down = 0;
+	INIT_LIST_HEAD(&cm_dev->cm_id_priv_list);
 
 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
 	rdma_for_each_port (ib_device, i) {
@@ -4381,6 +4423,7 @@ static int cm_add_one(struct ib_device *ib_device)
 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 {
 	struct cm_device *cm_dev = client_data;
+	struct cm_id_private *cm_id_priv, *tmp;
 	struct cm_port *port;
 	struct ib_port_modify port_modify = {
 		.clr_port_cap_mask = IB_PORT_CM_SUP
@@ -4396,6 +4439,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 	cm_dev->going_down = 1;
 	spin_unlock_irq(&cm.lock);
 
+	list_for_each_entry_safe(cm_id_priv, tmp,
+				 &cm_dev->cm_id_priv_list, cm_dev_list) {
+		if (!list_empty(&cm_id_priv->cm_dev_list))
+			list_del(&cm_id_priv->cm_dev_list);
+		cm_id_priv->av.port = NULL;
+		cm_id_priv->alt_av.port = NULL;
+	}
+
 	rdma_for_each_port (ib_device, i) {
 		if (!rdma_cap_ib_cm(ib_device, i))
 			continue;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
                   ` (6 preceding siblings ...)
  2021-04-21 11:40 ` [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  2021-04-22 19:08   ` Jason Gunthorpe
  2021-04-21 11:40 ` [PATCH rdma-next v2 9/9] IB/cm: Initialize av before aquire the spin lock in cm_lap_handler Leon Romanovsky
  8 siblings, 1 reply; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma

From: Mark Zhang <markzhang@nvidia.com>

Add a rwlock protection when access the av/alt_av's port pointer.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 134 +++++++++++++++++++++++++++--------
 1 file changed, 106 insertions(+), 28 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index f1a24492924f..28eb8a5ee54e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -262,6 +262,7 @@ struct cm_id_private {
 	/* todo: use alternate port on send failure */
 	struct cm_av av;
 	struct cm_av alt_av;
+	rwlock_t av_rwlock;	/* Do not acquire inside cm.lock */
 
 	void *private_data;
 	__be64 tid;
@@ -303,20 +304,37 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	struct ib_mad_agent *mad_agent;
 	struct ib_mad_send_buf *m;
 	struct ib_ah *ah;
+	int ret;
+
+	read_lock(&cm_id_priv->av_rwlock);
+	if (!cm_id_priv->av.port) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	mad_agent = cm_id_priv->av.port->mad_agent;
+	if (!mad_agent) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
-	if (IS_ERR(ah))
-		return (void *)ah;
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
+		goto out;
+	}
 
 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
 			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC,
 			       IB_MGMT_BASE_VERSION);
+
+	read_unlock(&cm_id_priv->av_rwlock);
 	if (IS_ERR(m)) {
 		rdma_destroy_ah(ah, 0);
-		return m;
+		ret = PTR_ERR(m);
+		goto out;
 	}
 
 	/* Timeout set by caller if response is expected. */
@@ -326,6 +344,10 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	refcount_inc(&cm_id_priv->refcount);
 	m->context[0] = cm_id_priv;
 	return m;
+
+out:
+	read_unlock(&cm_id_priv->av_rwlock);
+	return ERR_PTR(ret);
 }
 
 static void cm_free_msg(struct ib_mad_send_buf *msg)
@@ -455,7 +477,6 @@ static void add_cm_id_to_cm_dev_list(struct cm_id_private *cm_id_priv,
 	if (!list_empty(&cm_id_priv->cm_dev_list))
 		list_del(&cm_id_priv->cm_dev_list);
 	list_add_tail(&cm_id_priv->cm_dev_list, &cm_dev->cm_id_priv_list);
-
 out:
 	spin_unlock_irqrestore(&cm.lock, flags);
 }
@@ -468,8 +489,8 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
 	struct rdma_ah_attr new_ah_attr;
 	int ret;
 
-	av->port = port;
-	av->pkey_index = wc->pkey_index;
+	if (!port)
+		return -EINVAL;
 
 	/*
 	 * av->ah_attr might be initialized based on past wc during incoming
@@ -484,7 +505,11 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
 	if (ret)
 		return ret;
 
+	write_lock(&cm_id_priv->av_rwlock);
+	av->port = port;
+	av->pkey_index = wc->pkey_index;
 	add_cm_id_to_cm_dev_list(cm_id_priv, port->cm_dev);
+	write_unlock(&cm_id_priv->av_rwlock);
 
 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
@@ -496,8 +521,10 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
 {
 	struct cm_av *av = &cm_id_priv->av;
 
+	write_lock(&cm_id_priv->av_rwlock);
 	av->port = port;
 	add_cm_id_to_cm_dev_list(cm_id_priv, port->cm_dev);
+	write_unlock(&cm_id_priv->av_rwlock);
 	av->pkey_index = wc->pkey_index;
 	return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
 				       port->port_num, wc,
@@ -554,15 +581,21 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 	struct cm_device *cm_dev;
 	struct cm_port *port;
 	struct cm_av *av;
-	int ret;
+	int ret = 0;
 
 	port = get_cm_port_from_path(path, sgid_attr);
 	if (!port)
 		return -EINVAL;
 	cm_dev = port->cm_dev;
 
-	if (!is_priv_av && cm_dev != cm_id_priv->av.port->cm_dev)
-		return -EINVAL;
+	read_lock(&cm_id_priv->av_rwlock);
+	if (!is_priv_av &&
+	    (!cm_id_priv->av.port || cm_dev != cm_id_priv->av.port->cm_dev))
+		ret = -EINVAL;
+
+	read_unlock(&cm_id_priv->av_rwlock);
+	if (ret)
+		return ret;
 
 	av = is_priv_av ? &cm_id_priv->av : &cm_id_priv->alt_av;
 
@@ -571,8 +604,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 	if (ret)
 		return ret;
 
-	av->port = port;
-
 	/*
 	 * av->ah_attr might be initialized based on wc or during
 	 * request processing time which might have reference to sgid_attr.
@@ -587,11 +618,15 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 	if (ret)
 		return ret;
 
+	write_lock(&cm_id_priv->av_rwlock);
+	av->port = port;
 	av->timeout = path->packet_life_time + 1;
-	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	if (is_priv_av)
 		add_cm_id_to_cm_dev_list(cm_id_priv, cm_dev);
 
+	write_unlock(&cm_id_priv->av_rwlock);
+
+	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
 
@@ -873,6 +908,7 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
 	INIT_LIST_HEAD(&cm_id_priv->cm_dev_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	refcount_set(&cm_id_priv->refcount, 1);
+	rwlock_init(&cm_id_priv->av_rwlock);
 
 	ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
 			      &cm.local_id_next, GFP_KERNEL);
@@ -986,6 +1022,26 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
 	return min(31, ack_timeout);
 }
 
+static u8 cm_ack_timeout_req(struct cm_id_private *cm_id_priv,
+			     u8 packet_life_time)
+{
+	u8 ack_delay = 0;
+
+	read_lock(&cm_id_priv->av_rwlock);
+	if (cm_id_priv->av.port && cm_id_priv->av.port->cm_dev)
+		ack_delay = cm_id_priv->av.port->cm_dev->ack_delay;
+	read_unlock(&cm_id_priv->av_rwlock);
+
+	return cm_ack_timeout(ack_delay, packet_life_time);
+}
+
+static u8 cm_ack_timeout_rep(struct cm_id_private *cm_id_priv,
+			     u8 packet_life_time)
+{
+	return cm_ack_timeout(cm_id_priv->target_ack_delay,
+			      packet_life_time);
+}
+
 static void cm_remove_remote(struct cm_id_private *cm_id_priv)
 {
 	struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
@@ -1320,9 +1376,13 @@ EXPORT_SYMBOL(ib_cm_insert_listen);
 
 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
 {
-	u64 hi_tid, low_tid;
+	u64 hi_tid = 0, low_tid;
+
+	read_lock(&cm_id_priv->av_rwlock);
+	if (cm_id_priv->av.port && cm_id_priv->av.port->mad_agent)
+		hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+	read_unlock(&cm_id_priv->av_rwlock);
 
-	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
 	low_tid  = (u64)cm_id_priv->id.local_id;
 	return cpu_to_be64(hi_tid | low_tid);
 }
@@ -1426,8 +1486,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 	IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
 		(pri_path->hop_limit <= 1));
 	IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
-		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
-			       pri_path->packet_life_time));
+		cm_ack_timeout_req(cm_id_priv, pri_path->packet_life_time));
 
 	if (alt_path) {
 		bool alt_ext = false;
@@ -1478,8 +1537,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 		IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
 			(alt_path->hop_limit <= 1));
 		IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
-			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
-				       alt_path->packet_life_time));
+			cm_ack_timeout_req(cm_id_priv,
+					   alt_path->packet_life_time));
 	}
 	IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
 
@@ -1820,7 +1879,12 @@ static void cm_format_req_event(struct cm_work *work,
 	param = &work->cm_event.param.req_rcvd;
 	param->listen_id = listen_id;
 	param->bth_pkey = cm_get_bth_pkey(work);
-	param->port = cm_id_priv->av.port->port_num;
+	read_lock(&cm_id_priv->av_rwlock);
+	if (cm_id_priv->av.port)
+		param->port = cm_id_priv->av.port->port_num;
+	else
+		param->port = 0;
+	read_unlock(&cm_id_priv->av_rwlock);
 	param->primary_path = &work->path[0];
 	cm_opa_to_ib_sgid(work, param->primary_path);
 	if (cm_req_has_alt_path(req_msg)) {
@@ -2247,8 +2311,13 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
 	IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
 	IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
 		param->responder_resources);
-	IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
-		cm_id_priv->av.port->cm_dev->ack_delay);
+	read_lock(&cm_id_priv->av_rwlock);
+	if (cm_id_priv->av.port && cm_id_priv->av.port->cm_dev)
+		IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
+			cm_id_priv->av.port->cm_dev->ack_delay);
+	else
+		IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg, 0);
+	read_unlock(&cm_id_priv->av_rwlock);
 	IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
 	IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
 	IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
@@ -2566,11 +2635,9 @@ static int cm_rep_handler(struct cm_work *work)
 	cm_id_priv->target_ack_delay =
 		IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
 	cm_id_priv->av.timeout =
-			cm_ack_timeout(cm_id_priv->target_ack_delay,
-				       cm_id_priv->av.timeout - 1);
+		cm_ack_timeout_rep(cm_id_priv, cm_id_priv->av.timeout - 1);
 	cm_id_priv->alt_av.timeout =
-			cm_ack_timeout(cm_id_priv->target_ack_delay,
-				       cm_id_priv->alt_av.timeout - 1);
+		cm_ack_timeout_rep(cm_id_priv, cm_id_priv->alt_av.timeout - 1);
 
 	ib_cancel_mad(cm_id_priv->msg);
 	cm_queue_work_unlock(cm_id_priv, work);
@@ -4120,7 +4187,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
 						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
-		qp_attr->port_num = cm_id_priv->av.port->port_num;
+		read_lock(&cm_id_priv->av_rwlock);
+		qp_attr->port_num = cm_id_priv->av.port ?
+			cm_id_priv->av.port->port_num : 0;
+		read_unlock(&cm_id_priv->av_rwlock);
 		ret = 0;
 		break;
 	default:
@@ -4164,7 +4234,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
 		}
 		if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
 			*qp_attr_mask |= IB_QP_ALT_PATH;
-			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+			read_lock(&cm_id_priv->av_rwlock);
+			qp_attr->alt_port_num = cm_id_priv->alt_av.port ?
+				cm_id_priv->alt_av.port->port_num : 0;
+			read_unlock(&cm_id_priv->av_rwlock);
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4223,7 +4296,10 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
 			}
 		} else {
 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
-			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+			read_lock(&cm_id_priv->av_rwlock);
+			qp_attr->alt_port_num = cm_id_priv->alt_av.port ?
+				cm_id_priv->alt_av.port->port_num : 0;
+			read_unlock(&cm_id_priv->av_rwlock);
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4441,10 +4517,12 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 
 	list_for_each_entry_safe(cm_id_priv, tmp,
 				 &cm_dev->cm_id_priv_list, cm_dev_list) {
+		write_lock(&cm_id_priv->av_rwlock);
 		if (!list_empty(&cm_id_priv->cm_dev_list))
 			list_del(&cm_id_priv->cm_dev_list);
 		cm_id_priv->av.port = NULL;
 		cm_id_priv->alt_av.port = NULL;
+		write_unlock(&cm_id_priv->av_rwlock);
 	}
 
 	rdma_for_each_port (ib_device, i) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH rdma-next v2 9/9] IB/cm: Initialize av before aquire the spin lock in cm_lap_handler
  2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
                   ` (7 preceding siblings ...)
  2021-04-21 11:40 ` [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id Leon Romanovsky
@ 2021-04-21 11:40 ` Leon Romanovsky
  8 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-21 11:40 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma

From: Mark Zhang <markzhang@nvidia.com>

Both cm_init_av_for_lap and cm_init_av_by_path and might sleep and should
not be called within a spin_lock.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cm.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 28eb8a5ee54e..3feff999a5e0 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3400,6 +3400,17 @@ static int cm_lap_handler(struct cm_work *work)
 	work->cm_event.private_data =
 		IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
 
+	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+				 work->mad_recv_wc->recv_buf.grh,
+				 cm_id_priv);
+	if (ret)
+		goto deref;
+
+	ret = cm_init_av_by_path(param->alternate_path, NULL,
+				 cm_id_priv, false);
+	if (ret)
+		goto deref;
+
 	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
 		goto unlock;
@@ -3434,17 +3445,6 @@ static int cm_lap_handler(struct cm_work *work)
 		goto unlock;
 	}
 
-	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
-				 work->mad_recv_wc->recv_buf.grh,
-				 cm_id_priv);
-	if (ret)
-		goto unlock;
-
-	ret = cm_init_av_by_path(param->alternate_path, NULL,
-				 cm_id_priv, false);
-	if (ret)
-		goto unlock;
-
 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
 	cm_id_priv->tid = lap_msg->hdr.tid;
 	cm_queue_work_unlock(cm_id_priv, work);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id
  2021-04-21 11:40 ` [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id Leon Romanovsky
@ 2021-04-22 19:08   ` Jason Gunthorpe
  2021-04-25 13:21     ` Leon Romanovsky
  0 siblings, 1 reply; 18+ messages in thread
From: Jason Gunthorpe @ 2021-04-22 19:08 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Doug Ledford, Mark Zhang, linux-rdma

On Wed, Apr 21, 2021 at 02:40:38PM +0300, Leon Romanovsky wrote:
> @@ -303,20 +304,37 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
>  	struct ib_mad_agent *mad_agent;
>  	struct ib_mad_send_buf *m;
>  	struct ib_ah *ah;
> +	int ret;
> +
> +	read_lock(&cm_id_priv->av_rwlock);
> +	if (!cm_id_priv->av.port) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
>  
>  	mad_agent = cm_id_priv->av.port->mad_agent;
> +	if (!mad_agent) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
>  	ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
> -	if (IS_ERR(ah))
> -		return (void *)ah;
> +	if (IS_ERR(ah)) {
> +		ret = PTR_ERR(ah);
> +		goto out;
> +	}
>  
>  	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
>  			       cm_id_priv->av.pkey_index,
>  			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
>  			       GFP_ATOMIC,
>  			       IB_MGMT_BASE_VERSION);
> +
> +	read_unlock(&cm_id_priv->av_rwlock);
>  	if (IS_ERR(m)) {
>  		rdma_destroy_ah(ah, 0);
> -		return m;
> +		ret = PTR_ERR(m);
> +		goto out;
>  	}
>  
>  	/* Timeout set by caller if response is expected. */
> @@ -326,6 +344,10 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
>  	refcount_inc(&cm_id_priv->refcount);
>  	m->context[0] = cm_id_priv;
>  	return m;
> +
> +out:
> +	read_unlock(&cm_id_priv->av_rwlock);

This flow has read_unlock happening twice on error

Jason

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-21 11:40 ` [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device Leon Romanovsky
@ 2021-04-22 19:34   ` Jason Gunthorpe
  2021-04-23 13:14     ` Mark Zhang
  0 siblings, 1 reply; 18+ messages in thread
From: Jason Gunthorpe @ 2021-04-22 19:34 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Doug Ledford, Mark Zhang, linux-rdma

On Wed, Apr 21, 2021 at 02:40:37PM +0300, Leon Romanovsky wrote:
> @@ -4396,6 +4439,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
>  	cm_dev->going_down = 1;
>  	spin_unlock_irq(&cm.lock);
>  
> +	list_for_each_entry_safe(cm_id_priv, tmp,
> +				 &cm_dev->cm_id_priv_list, cm_dev_list) {
> +		if (!list_empty(&cm_id_priv->cm_dev_list))
> +			list_del(&cm_id_priv->cm_dev_list);
> +		cm_id_priv->av.port = NULL;
> +		cm_id_priv->alt_av.port = NULL;
> +	}

Ugh, this is in the wrong order, it has to be after the work queue
flush..

Hurm, I didn't see an easy way to fix it up, but I did think of a much
better design!

Generally speaking all we need is the memory of the cm_dev and port to
remain active, we don't need to block or fence with cm_remove_one(),
so just stick a memory kref on this thing and keep the memory. The
only things that needs to seralize with cm_remove_one() are on the
workqueue or take a spinlock (eg because they touch mad_agent)

Try this, I didn't finish every detail, applies on top of your series,
but you'll need to reflow it into new commits:

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 3feff999a5e003..c26367006a4485 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -205,8 +205,11 @@ struct cm_port {
 };
 
 struct cm_device {
+	struct kref kref;
 	struct list_head list;
+	struct mutex unregistration_lock;
 	struct ib_device *ib_device;
+	unsigned int num_ports;
 	u8 ack_delay;
 	int going_down;
 	struct list_head cm_id_priv_list;
@@ -262,7 +265,6 @@ struct cm_id_private {
 	/* todo: use alternate port on send failure */
 	struct cm_av av;
 	struct cm_av alt_av;
-	rwlock_t av_rwlock;	/* Do not acquire inside cm.lock */
 
 	void *private_data;
 	__be64 tid;
@@ -287,10 +289,23 @@ struct cm_id_private {
 	atomic_t work_count;
 
 	struct rdma_ucm_ece ece;
-
-	struct list_head cm_dev_list;
 };
 
+static void cm_dev_release(struct kref *kref)
+{
+	struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
+	unsigned int i;
+
+	for (i = 0; i != cm_dev->num_ports; i++)
+		kfree(cm_dev->port[i]);
+	kfree(cm_dev);
+}
+
+static void cm_device_put(struct cm_device *cm_dev)
+{
+	kref_put(&cm_dev->kref, cm_dev_release);
+}
+
 static void cm_work_handler(struct work_struct *work);
 
 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
@@ -306,12 +321,12 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	struct ib_ah *ah;
 	int ret;
 
-	read_lock(&cm_id_priv->av_rwlock);
 	if (!cm_id_priv->av.port) {
 		ret = -EINVAL;
 		goto out;
 	}
 
+	spin_lock(&cm_id_priv->av.port.cm_dev->unregistration_lock);
 	mad_agent = cm_id_priv->av.port->mad_agent;
 	if (!mad_agent) {
 		ret = -EINVAL;
@@ -330,7 +345,6 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 			       GFP_ATOMIC,
 			       IB_MGMT_BASE_VERSION);
 
-	read_unlock(&cm_id_priv->av_rwlock);
 	if (IS_ERR(m)) {
 		rdma_destroy_ah(ah, 0);
 		ret = PTR_ERR(m);
@@ -346,7 +360,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
 	return m;
 
 out:
-	read_unlock(&cm_id_priv->av_rwlock);
+	spin_unlock(&cm_id_priv->av.port.cm_dev->unregistration_lock);
 	return ERR_PTR(ret);
 }
 
@@ -465,20 +479,18 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
 	cm_id_priv->private_data_len = private_data_len;
 }
 
-static void add_cm_id_to_cm_dev_list(struct cm_id_private *cm_id_priv,
-				     struct cm_device *cm_dev)
+static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
 {
-	unsigned long flags;
+	struct cm_port *old_port = av->port;
 
-	spin_lock_irqsave(&cm.lock, flags);
-	if (cm_dev->going_down)
-		goto out;
+	if (old_port == port)
+		return;
 
-	if (!list_empty(&cm_id_priv->cm_dev_list))
-		list_del(&cm_id_priv->cm_dev_list);
-	list_add_tail(&cm_id_priv->cm_dev_list, &cm_dev->cm_id_priv_list);
-out:
-	spin_unlock_irqrestore(&cm.lock, flags);
+	av->port = port;
+	if (old_port)
+		cm_device_put(old_port->cm_dev);
+	if (port)
+		kref_get(&old_port->cm_dev->kref);
 }
 
 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
@@ -505,11 +517,8 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
 	if (ret)
 		return ret;
 
-	write_lock(&cm_id_priv->av_rwlock);
-	av->port = port;
+	cm_set_av_port(av, port);
 	av->pkey_index = wc->pkey_index;
-	add_cm_id_to_cm_dev_list(cm_id_priv, port->cm_dev);
-	write_unlock(&cm_id_priv->av_rwlock);
 
 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
@@ -521,10 +530,7 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
 {
 	struct cm_av *av = &cm_id_priv->av;
 
-	write_lock(&cm_id_priv->av_rwlock);
-	av->port = port;
-	add_cm_id_to_cm_dev_list(cm_id_priv, port->cm_dev);
-	write_unlock(&cm_id_priv->av_rwlock);
+	cm_set_av_port(av, port);
 	av->pkey_index = wc->pkey_index;
 	return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
 				       port->port_num, wc,
@@ -588,12 +594,9 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 		return -EINVAL;
 	cm_dev = port->cm_dev;
 
-	read_lock(&cm_id_priv->av_rwlock);
 	if (!is_priv_av &&
 	    (!cm_id_priv->av.port || cm_dev != cm_id_priv->av.port->cm_dev))
 		ret = -EINVAL;
-
-	read_unlock(&cm_id_priv->av_rwlock);
 	if (ret)
 		return ret;
 
@@ -618,13 +621,8 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
 	if (ret)
 		return ret;
 
-	write_lock(&cm_id_priv->av_rwlock);
-	av->port = port;
+	cm_set_av_port(av, port);
 	av->timeout = path->packet_life_time + 1;
-	if (is_priv_av)
-		add_cm_id_to_cm_dev_list(cm_id_priv, cm_dev);
-
-	write_unlock(&cm_id_priv->av_rwlock);
 
 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
@@ -905,10 +903,8 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
 	spin_lock_init(&cm_id_priv->lock);
 	init_completion(&cm_id_priv->comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
-	INIT_LIST_HEAD(&cm_id_priv->cm_dev_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	refcount_set(&cm_id_priv->refcount, 1);
-	rwlock_init(&cm_id_priv->av_rwlock);
 
 	ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
 			      &cm.local_id_next, GFP_KERNEL);
@@ -1027,10 +1023,8 @@ static u8 cm_ack_timeout_req(struct cm_id_private *cm_id_priv,
 {
 	u8 ack_delay = 0;
 
-	read_lock(&cm_id_priv->av_rwlock);
-	if (cm_id_priv->av.port && cm_id_priv->av.port->cm_dev)
+	if (cm_id_priv->av.port)
 		ack_delay = cm_id_priv->av.port->cm_dev->ack_delay;
-	read_unlock(&cm_id_priv->av_rwlock);
 
 	return cm_ack_timeout(ack_delay, packet_life_time);
 }
@@ -1228,8 +1222,6 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 		cm_id_priv->timewait_info = NULL;
 	}
 
-	if (!list_empty(&cm_id_priv->cm_dev_list))
-		list_del(&cm_id_priv->cm_dev_list);
 	WARN_ON(cm_id_priv->listen_sharecount);
 	WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
 	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
@@ -1246,6 +1238,8 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 	rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
 	rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
 	kfree(cm_id_priv->private_data);
+	cm_set_av_port(&cm_id_priv->av, NULL);
+	cm_set_av_port(&cm_id_priv->alt_av, NULL);
 	kfree_rcu(cm_id_priv, rcu);
 }
 
@@ -1378,10 +1372,13 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
 {
 	u64 hi_tid = 0, low_tid;
 
-	read_lock(&cm_id_priv->av_rwlock);
-	if (cm_id_priv->av.port && cm_id_priv->av.port->mad_agent)
-		hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
-	read_unlock(&cm_id_priv->av_rwlock);
+	if (cm_id_priv->av.port) {
+		spin_lock(&cm_id_priv->av.port->cm_dev->unregistration_lock);
+		if (cm_id_priv->av.port->mad_agent)
+			hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid)
+				 << 32;
+		spin_unlock(&cm_id_priv->av.port->cm_dev->unregistration_lock);
+	}
 
 	low_tid  = (u64)cm_id_priv->id.local_id;
 	return cpu_to_be64(hi_tid | low_tid);
@@ -1879,12 +1876,10 @@ static void cm_format_req_event(struct cm_work *work,
 	param = &work->cm_event.param.req_rcvd;
 	param->listen_id = listen_id;
 	param->bth_pkey = cm_get_bth_pkey(work);
-	read_lock(&cm_id_priv->av_rwlock);
 	if (cm_id_priv->av.port)
 		param->port = cm_id_priv->av.port->port_num;
 	else
 		param->port = 0;
-	read_unlock(&cm_id_priv->av_rwlock);
 	param->primary_path = &work->path[0];
 	cm_opa_to_ib_sgid(work, param->primary_path);
 	if (cm_req_has_alt_path(req_msg)) {
@@ -2311,13 +2306,11 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
 	IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
 	IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
 		param->responder_resources);
-	read_lock(&cm_id_priv->av_rwlock);
 	if (cm_id_priv->av.port && cm_id_priv->av.port->cm_dev)
 		IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
 			cm_id_priv->av.port->cm_dev->ack_delay);
 	else
 		IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg, 0);
-	read_unlock(&cm_id_priv->av_rwlock);
 	IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
 	IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
 	IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
@@ -4187,10 +4180,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
 						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
-		read_lock(&cm_id_priv->av_rwlock);
 		qp_attr->port_num = cm_id_priv->av.port ?
 			cm_id_priv->av.port->port_num : 0;
-		read_unlock(&cm_id_priv->av_rwlock);
 		ret = 0;
 		break;
 	default:
@@ -4234,10 +4225,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
 		}
 		if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
 			*qp_attr_mask |= IB_QP_ALT_PATH;
-			read_lock(&cm_id_priv->av_rwlock);
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port ?
 				cm_id_priv->alt_av.port->port_num : 0;
-			read_unlock(&cm_id_priv->av_rwlock);
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4296,10 +4285,8 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
 			}
 		} else {
 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
-			read_lock(&cm_id_priv->av_rwlock);
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port ?
 				cm_id_priv->alt_av.port->port_num : 0;
-			read_unlock(&cm_id_priv->av_rwlock);
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4417,9 +4404,11 @@ static int cm_add_one(struct ib_device *ib_device)
 	if (!cm_dev)
 		return -ENOMEM;
 
+	kref_init(&cm_dev->kref);
 	cm_dev->ib_device = ib_device;
 	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
 	cm_dev->going_down = 0;
+	cm_dev->num_ports = ib_device->phys_port_cnt;
 	INIT_LIST_HEAD(&cm_dev->cm_id_priv_list);
 
 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
@@ -4489,10 +4478,9 @@ static int cm_add_one(struct ib_device *ib_device)
 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
 		ib_unregister_mad_agent(port->mad_agent);
 		cm_remove_port_fs(port);
-		kfree(port);
 	}
 free:
-	kfree(cm_dev);
+	cm_device_put(cm_dev);
 	return ret;
 }
 
@@ -4515,21 +4503,15 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 	cm_dev->going_down = 1;
 	spin_unlock_irq(&cm.lock);
 
-	list_for_each_entry_safe(cm_id_priv, tmp,
-				 &cm_dev->cm_id_priv_list, cm_dev_list) {
-		write_lock(&cm_id_priv->av_rwlock);
-		if (!list_empty(&cm_id_priv->cm_dev_list))
-			list_del(&cm_id_priv->cm_dev_list);
-		cm_id_priv->av.port = NULL;
-		cm_id_priv->alt_av.port = NULL;
-		write_unlock(&cm_id_priv->av_rwlock);
-	}
-
 	rdma_for_each_port (ib_device, i) {
+		struct ib_mad_agent *mad_agent;
+
 		if (!rdma_cap_ib_cm(ib_device, i))
 			continue;
 
 		port = cm_dev->port[i-1];
+		mad_agent = port->mad_agent;
+
 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
 		/*
 		 * We flush the queue here after the going_down set, this
@@ -4537,12 +4519,20 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 		 * after that we can call the unregister_mad_agent
 		 */
 		flush_workqueue(cm.wq);
-		ib_unregister_mad_agent(port->mad_agent);
+		/*
+		 * The above ensures no call paths from the work are running,
+		 * the remaining paths all take the unregistration lock
+		 */
+		spin_lock(&cm_dev->unregistration_lock);
+		port->mad_agent = NULL;
+		spin_unlock(&cm_dev->unregistration_lock);
+		ib_unregister_mad_agent(mad_agent);
 		cm_remove_port_fs(port);
-		kfree(port);
 	}
 
-	kfree(cm_dev);
+	/* All touches can only be on call path from the work */
+	cm_dev->ib_device = NULL;
+	cm_device_put(cm_dev);
 }
 
 static int __init ib_cm_init(void)


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-22 19:34   ` Jason Gunthorpe
@ 2021-04-23 13:14     ` Mark Zhang
  2021-04-23 14:24       ` Jason Gunthorpe
  0 siblings, 1 reply; 18+ messages in thread
From: Mark Zhang @ 2021-04-23 13:14 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky; +Cc: Doug Ledford, linux-rdma



On 4/23/2021 3:34 AM, Jason Gunthorpe wrote:
> On Wed, Apr 21, 2021 at 02:40:37PM +0300, Leon Romanovsky wrote:
>> @@ -4396,6 +4439,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
>>   	cm_dev->going_down = 1;
>>   	spin_unlock_irq(&cm.lock);
>>   
>> +	list_for_each_entry_safe(cm_id_priv, tmp,
>> +				 &cm_dev->cm_id_priv_list, cm_dev_list) {
>> +		if (!list_empty(&cm_id_priv->cm_dev_list))
>> +			list_del(&cm_id_priv->cm_dev_list);
>> +		cm_id_priv->av.port = NULL;
>> +		cm_id_priv->alt_av.port = NULL;
>> +	}
> 
> Ugh, this is in the wrong order, it has to be after the work queue
> flush..
> 
> Hurm, I didn't see an easy way to fix it up, but I did think of a much
> better design!
> 
> Generally speaking all we need is the memory of the cm_dev and port to
> remain active, we don't need to block or fence with cm_remove_one(),
> so just stick a memory kref on this thing and keep the memory. The
> only things that needs to seralize with cm_remove_one() are on the
> workqueue or take a spinlock (eg because they touch mad_agent)
> 
> Try this, I didn't finish every detail, applies on top of your series,
> but you'll need to reflow it into new commits:

Thanks Jason, I think we still need a rwlock to protect "av->port"? It 
is modified and cleared by cm_set_av_port() and read in many places.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-23 13:14     ` Mark Zhang
@ 2021-04-23 14:24       ` Jason Gunthorpe
  2021-04-24  2:33         ` Mark Zhang
  0 siblings, 1 reply; 18+ messages in thread
From: Jason Gunthorpe @ 2021-04-23 14:24 UTC (permalink / raw)
  To: Mark Zhang; +Cc: Leon Romanovsky, Doug Ledford, linux-rdma

On Fri, Apr 23, 2021 at 09:14:21PM +0800, Mark Zhang wrote:
> 
> 
> On 4/23/2021 3:34 AM, Jason Gunthorpe wrote:
> > On Wed, Apr 21, 2021 at 02:40:37PM +0300, Leon Romanovsky wrote:
> > > @@ -4396,6 +4439,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
> > >   	cm_dev->going_down = 1;
> > >   	spin_unlock_irq(&cm.lock);
> > > +	list_for_each_entry_safe(cm_id_priv, tmp,
> > > +				 &cm_dev->cm_id_priv_list, cm_dev_list) {
> > > +		if (!list_empty(&cm_id_priv->cm_dev_list))
> > > +			list_del(&cm_id_priv->cm_dev_list);
> > > +		cm_id_priv->av.port = NULL;
> > > +		cm_id_priv->alt_av.port = NULL;
> > > +	}
> > 
> > Ugh, this is in the wrong order, it has to be after the work queue
> > flush..
> > 
> > Hurm, I didn't see an easy way to fix it up, but I did think of a much
> > better design!
> > 
> > Generally speaking all we need is the memory of the cm_dev and port to
> > remain active, we don't need to block or fence with cm_remove_one(),
> > so just stick a memory kref on this thing and keep the memory. The
> > only things that needs to seralize with cm_remove_one() are on the
> > workqueue or take a spinlock (eg because they touch mad_agent)
> > 
> > Try this, I didn't finish every detail, applies on top of your series,
> > but you'll need to reflow it into new commits:
> 
> Thanks Jason, I think we still need a rwlock to protect "av->port"? It is
> modified and cleared by cm_set_av_port() and read in many places.

Hum..

This is a real mess.

It looks to me like any access to the av->port should always be
protected by the cm_id_priv->lock

Most already are, but the sets are wrong and a couple readers are wrong

Set reverse call chains:

cm_init_av_for_lap()
 cm_lap_handler(work) (ok)

cm_init_av_for_response()
 cm_req_handler(work) (OK, cm_id_priv is on stack)
 cm_sidr_req_handler(work) (OK, cm_id_priv is on stack)

cm_init_av_by_path()
 cm_req_handler(work) (OK, cm_id_priv is on stack)
 cm_lap_handler(work) (OK)
 ib_send_cm_req() (not locked)
   cma_connect_ib()
    rdma_connect_locked()
     [..]
   ipoib_cm_send_req()
   srp_send_req()
     srp_connect_ch()
      [..]
 ib_send_cm_sidr_req() (not locked)
  cma_resolve_ib_udp()
   rdma_connect_locked()

And
  cm_destroy_id() (locked)

And read reverse call chains:

cm_alloc_msg()
 ib_send_cm_req() (not locked)
 ib_send_cm_rep() (OK)
 ib_send_cm_rtu() (OK)
 cm_send_dreq_locked() (OK)
 cm_send_drep_locked() (OK)
 cm_send_rej_locked() (OK)
 ib_send_cm_mra() (OK)
 ib_send_cm_sidr_req() (not locked)
 cm_send_sidr_rep_locked() (OK)
cm_form_tid()
 cm_format_req()
  ib_send_cm_req() (sort of OK)
 cm_format_dreq()
   cm_send_dreq_locked (OK)
cm_format_req()
  ib_send_cm_req() (sort of OK)
cm_format_req_event()
 cm_req_handler() (OK, cm_id_priv is on stack)
cm_format_rep()
 ib_send_cm_rep() (OK)
cm_rep_handler(work) (OK)
cm_establish_handler(work) (OK)
cm_rtu_handler(work) (OK)
cm_send_dreq_locked() (OK)
cm_dreq_handler(work) (OK)
cm_drep_handler(work) (OK)
cm_rej_handler(work) (OK)
cm_mra_handler(work) (OK)
cm_apr_handler(work) (OK)
cm_sidr_rep_handler(work) (OK)
cm_init_qp_init_attr() (OK)
cm_init_qp_rtr_attr() (OK)
cm_init_qp_rts_attr() (OK)

So.. That leaves these functions that are not obviously locked
correctly:
 ib_send_cm_req()
 ib_send_cm_sidr_req()

And the way their locking expects to work is basically because they
expect that there are not parallel touches to the cm_id - however I'm
doubtful this is completely true.

So no new lock needed, but something should be done about the above
two functions, and this should be documented

Jason

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-23 14:24       ` Jason Gunthorpe
@ 2021-04-24  2:33         ` Mark Zhang
  2021-04-26 13:56           ` Jason Gunthorpe
  0 siblings, 1 reply; 18+ messages in thread
From: Mark Zhang @ 2021-04-24  2:33 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Leon Romanovsky, Doug Ledford, linux-rdma



On 4/23/2021 10:24 PM, Jason Gunthorpe wrote:
> On Fri, Apr 23, 2021 at 09:14:21PM +0800, Mark Zhang wrote:
>>
>>
>> On 4/23/2021 3:34 AM, Jason Gunthorpe wrote:
>>> On Wed, Apr 21, 2021 at 02:40:37PM +0300, Leon Romanovsky wrote:
>>>> @@ -4396,6 +4439,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
>>>>    	cm_dev->going_down = 1;
>>>>    	spin_unlock_irq(&cm.lock);
>>>> +	list_for_each_entry_safe(cm_id_priv, tmp,
>>>> +				 &cm_dev->cm_id_priv_list, cm_dev_list) {
>>>> +		if (!list_empty(&cm_id_priv->cm_dev_list))
>>>> +			list_del(&cm_id_priv->cm_dev_list);
>>>> +		cm_id_priv->av.port = NULL;
>>>> +		cm_id_priv->alt_av.port = NULL;
>>>> +	}
>>>
>>> Ugh, this is in the wrong order, it has to be after the work queue
>>> flush..
>>>
>>> Hurm, I didn't see an easy way to fix it up, but I did think of a much
>>> better design!
>>>
>>> Generally speaking all we need is the memory of the cm_dev and port to
>>> remain active, we don't need to block or fence with cm_remove_one(),
>>> so just stick a memory kref on this thing and keep the memory. The
>>> only things that needs to seralize with cm_remove_one() are on the
>>> workqueue or take a spinlock (eg because they touch mad_agent)
>>>
>>> Try this, I didn't finish every detail, applies on top of your series,
>>> but you'll need to reflow it into new commits:
>>
>> Thanks Jason, I think we still need a rwlock to protect "av->port"? It is
>> modified and cleared by cm_set_av_port() and read in many places.
> 
> Hum..
> 
> This is a real mess.
> 
> It looks to me like any access to the av->port should always be
> protected by the cm_id_priv->lock
> 
> Most already are, but the sets are wrong and a couple readers are wrong
> 
> Set reverse call chains:
> 
> cm_init_av_for_lap()
>   cm_lap_handler(work) (ok)
> 
> cm_init_av_for_response()
>   cm_req_handler(work) (OK, cm_id_priv is on stack)
>   cm_sidr_req_handler(work) (OK, cm_id_priv is on stack)
> 
> cm_init_av_by_path()
>   cm_req_handler(work) (OK, cm_id_priv is on stack)
>   cm_lap_handler(work) (OK)
>   ib_send_cm_req() (not locked)
>     cma_connect_ib()
>      rdma_connect_locked()
>       [..]
>     ipoib_cm_send_req()
>     srp_send_req()
>       srp_connect_ch()
>        [..]
>   ib_send_cm_sidr_req() (not locked)
>    cma_resolve_ib_udp()
>     rdma_connect_locked()
> 

Both cm_init_av_for_lap() and cm_init_av_by_path() might sleep (the last 
patch of this series tries to fix this issue), they cannot be called 
with cm_id_priv->lock


> And
>    cm_destroy_id() (locked)
> 
> And read reverse call chains:
> 
> cm_alloc_msg()
>   ib_send_cm_req() (not locked)
>   ib_send_cm_rep() (OK)
>   ib_send_cm_rtu() (OK)
>   cm_send_dreq_locked() (OK)
>   cm_send_drep_locked() (OK)
>   cm_send_rej_locked() (OK)
>   ib_send_cm_mra() (OK)
>   ib_send_cm_sidr_req() (not locked)
>   cm_send_sidr_rep_locked() (OK)
> cm_form_tid()
>   cm_format_req()
>    ib_send_cm_req() (sort of OK)
>   cm_format_dreq()
>     cm_send_dreq_locked (OK)
> cm_format_req()
>    ib_send_cm_req() (sort of OK)
> cm_format_req_event()
>   cm_req_handler() (OK, cm_id_priv is on stack)
> cm_format_rep()
>   ib_send_cm_rep() (OK)
> cm_rep_handler(work) (OK)
> cm_establish_handler(work) (OK)
> cm_rtu_handler(work) (OK)
> cm_send_dreq_locked() (OK)
> cm_dreq_handler(work) (OK)
> cm_drep_handler(work) (OK)
> cm_rej_handler(work) (OK)
> cm_mra_handler(work) (OK)
> cm_apr_handler(work) (OK)
> cm_sidr_rep_handler(work) (OK)
> cm_init_qp_init_attr() (OK)
> cm_init_qp_rtr_attr() (OK)
> cm_init_qp_rts_attr() (OK)
> 
> So.. That leaves these functions that are not obviously locked
> correctly:
>   ib_send_cm_req()
>   ib_send_cm_sidr_req()
> 
> And the way their locking expects to work is basically because they
> expect that there are not parallel touches to the cm_id - however I'm
> doubtful this is completely true.
> 
> So no new lock needed, but something should be done about the above
> two functions, and this should be documented
> 
> Jason
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id
  2021-04-22 19:08   ` Jason Gunthorpe
@ 2021-04-25 13:21     ` Leon Romanovsky
  0 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2021-04-25 13:21 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, Mark Zhang, linux-rdma

On Thu, Apr 22, 2021 at 04:08:14PM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 21, 2021 at 02:40:38PM +0300, Leon Romanovsky wrote:
> > @@ -303,20 +304,37 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
> >  	struct ib_mad_agent *mad_agent;
> >  	struct ib_mad_send_buf *m;
> >  	struct ib_ah *ah;
> > +	int ret;
> > +
> > +	read_lock(&cm_id_priv->av_rwlock);
> > +	if (!cm_id_priv->av.port) {
> > +		ret = -EINVAL;
> > +		goto out;
> > +	}
> >  
> >  	mad_agent = cm_id_priv->av.port->mad_agent;
> > +	if (!mad_agent) {
> > +		ret = -EINVAL;
> > +		goto out;
> > +	}
> > +
> >  	ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
> > -	if (IS_ERR(ah))
> > -		return (void *)ah;
> > +	if (IS_ERR(ah)) {
> > +		ret = PTR_ERR(ah);
> > +		goto out;
> > +	}
> >  
> >  	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
> >  			       cm_id_priv->av.pkey_index,
> >  			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
> >  			       GFP_ATOMIC,
> >  			       IB_MGMT_BASE_VERSION);
> > +
> > +	read_unlock(&cm_id_priv->av_rwlock);
> >  	if (IS_ERR(m)) {
> >  		rdma_destroy_ah(ah, 0);
> > -		return m;
> > +		ret = PTR_ERR(m);
> > +		goto out;
> >  	}
> >  
> >  	/* Timeout set by caller if response is expected. */
> > @@ -326,6 +344,10 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
> >  	refcount_inc(&cm_id_priv->refcount);
> >  	m->context[0] = cm_id_priv;
> >  	return m;
> > +
> > +out:
> > +	read_unlock(&cm_id_priv->av_rwlock);
> 
> This flow has read_unlock happening twice on error

Ohh, sorry, I will fix.

Thanks

> 
> Jason

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-24  2:33         ` Mark Zhang
@ 2021-04-26 13:56           ` Jason Gunthorpe
  2021-04-27  1:59             ` Mark Zhang
  0 siblings, 1 reply; 18+ messages in thread
From: Jason Gunthorpe @ 2021-04-26 13:56 UTC (permalink / raw)
  To: Mark Zhang; +Cc: Leon Romanovsky, Doug Ledford, linux-rdma

On Sat, Apr 24, 2021 at 10:33:13AM +0800, Mark Zhang wrote:
> > 
> > Set reverse call chains:
> > 
> > cm_init_av_for_lap()
> >   cm_lap_handler(work) (ok)
> > 
> > cm_init_av_for_response()
> >   cm_req_handler(work) (OK, cm_id_priv is on stack)
> >   cm_sidr_req_handler(work) (OK, cm_id_priv is on stack)
> > 
> > cm_init_av_by_path()
> >   cm_req_handler(work) (OK, cm_id_priv is on stack)
> >   cm_lap_handler(work) (OK)
> >   ib_send_cm_req() (not locked)
> >     cma_connect_ib()
> >      rdma_connect_locked()
> >       [..]
> >     ipoib_cm_send_req()
> >     srp_send_req()
> >       srp_connect_ch()
> >        [..]
> >   ib_send_cm_sidr_req() (not locked)
> >    cma_resolve_ib_udp()
> >     rdma_connect_locked()
> > 
> 
> Both cm_init_av_for_lap() 

Well, it is wrong today, look at cm_lap_handler():

	spin_lock_irq(&cm_id_priv->lock);
	[..]
	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
				 work->mad_recv_wc->recv_buf.grh,
				 &cm_id_priv->av);
	[..]
	cm_queue_work_unlock(cm_id_priv, work);

These need to be restructured, the sleeping calls to extract the
new_ah_attr have to be done before we go into the spinlock.

That is probably the general solution to all the cases, do some work
before the lock and then copy from the stack to the memory under the
spinlock.

Jason

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device
  2021-04-26 13:56           ` Jason Gunthorpe
@ 2021-04-27  1:59             ` Mark Zhang
  0 siblings, 0 replies; 18+ messages in thread
From: Mark Zhang @ 2021-04-27  1:59 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Leon Romanovsky, Doug Ledford, linux-rdma



On 4/26/2021 9:56 PM, Jason Gunthorpe wrote:
> On Sat, Apr 24, 2021 at 10:33:13AM +0800, Mark Zhang wrote:
>>>
>>> Set reverse call chains:
>>>
>>> cm_init_av_for_lap()
>>>    cm_lap_handler(work) (ok)
>>>
>>> cm_init_av_for_response()
>>>    cm_req_handler(work) (OK, cm_id_priv is on stack)
>>>    cm_sidr_req_handler(work) (OK, cm_id_priv is on stack)
>>>
>>> cm_init_av_by_path()
>>>    cm_req_handler(work) (OK, cm_id_priv is on stack)
>>>    cm_lap_handler(work) (OK)
>>>    ib_send_cm_req() (not locked)
>>>      cma_connect_ib()
>>>       rdma_connect_locked()
>>>        [..]
>>>      ipoib_cm_send_req()
>>>      srp_send_req()
>>>        srp_connect_ch()
>>>         [..]
>>>    ib_send_cm_sidr_req() (not locked)
>>>     cma_resolve_ib_udp()
>>>      rdma_connect_locked()
>>>
>>
>> Both cm_init_av_for_lap()
> 
> Well, it is wrong today, look at cm_lap_handler():
> 
> 	spin_lock_irq(&cm_id_priv->lock);
> 	[..]
> 	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
> 				 work->mad_recv_wc->recv_buf.grh,
> 				 &cm_id_priv->av);
> 	[..]
> 	cm_queue_work_unlock(cm_id_priv, work);
> 
> These need to be restructured, the sleeping calls to extract the
> new_ah_attr have to be done before we go into the spinlock.
> 
> That is probably the general solution to all the cases, do some work
> before the lock and then copy from the stack to the memory under the
> spinlock.

Maybe we can call cm_set_av_port(av, port) outside of cm_init_av_*? So 
that we can apply cm_id_priv->lock when needed.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-04-27  2:00 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-21 11:40 [PATCH rdma-next v2 0/9] Fix memory corruption in CM Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 1/9] IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg() Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 2/9] IB/cm: Split cm_alloc_msg() Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 3/9] IB/cm: Call the correct message free functions in cm_send_handler() Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 4/9] IB/cm: Tidy remaining cm_msg free paths Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 5/9] Revert "IB/cm: Mark stale CM id's whenever the mad agent was unregistered" Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 6/9] IB/cm: Simplify ib_cancel_mad() and ib_modify_mad() calls Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 7/9] IB/cm: Clear all associated AV's ports when remove a cm device Leon Romanovsky
2021-04-22 19:34   ` Jason Gunthorpe
2021-04-23 13:14     ` Mark Zhang
2021-04-23 14:24       ` Jason Gunthorpe
2021-04-24  2:33         ` Mark Zhang
2021-04-26 13:56           ` Jason Gunthorpe
2021-04-27  1:59             ` Mark Zhang
2021-04-21 11:40 ` [PATCH rdma-next v2 8/9] IB/cm: Add lock protection when access av/alt_av's port of a cm_id Leon Romanovsky
2021-04-22 19:08   ` Jason Gunthorpe
2021-04-25 13:21     ` Leon Romanovsky
2021-04-21 11:40 ` [PATCH rdma-next v2 9/9] IB/cm: Initialize av before aquire the spin lock in cm_lap_handler Leon Romanovsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.