All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bob Pearson <rpearsonhpe@gmail.com>
To: jgg@nvidia.com, zyjzyj2000@gmail.com, linux-rdma@vger.kernel.org
Cc: Bob Pearson <rpearsonhpe@gmail.com>
Subject: [RFC PATCH v9 24/26] RDMA/rxe: Add wait_for_completion to pool objects
Date: Thu, 27 Jan 2022 15:37:53 -0600	[thread overview]
Message-ID: <20220127213755.31697-25-rpearsonhpe@gmail.com> (raw)
In-Reply-To: <20220127213755.31697-1-rpearsonhpe@gmail.com>

Reference counting for object deletion can cause an object to
wait for something else to happen before an object gets deleted.
The destroy verbs can then return to rdma-core with the object still
holding references. Adding wait_for_completion in this path
prevents this.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c    |  1 +
 drivers/infiniband/sw/rxe/rxe_mw.c    |  3 +-
 drivers/infiniband/sw/rxe/rxe_pool.c  | 79 ++++++++++++++++++++++-----
 drivers/infiniband/sw/rxe/rxe_pool.h  |  4 ++
 drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ++++
 5 files changed, 84 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 35628b8a00b4..6d1ce05bcf65 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -692,6 +692,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 	mr->state = RXE_MR_STATE_INVALID;
 	rxe_drop_ref(mr_pd(mr));
 	rxe_drop_ref(mr);
+	rxe_wait(mr);
 
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 7df36c40eec2..dd3d02db3d03 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -60,8 +60,9 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
 	rxe_do_dealloc_mw(mw);
 	spin_unlock_bh(&mw->lock);
 
-	rxe_drop_ref(mw);
 	rxe_drop_ref(pd);
+	rxe_drop_ref(mw);
+	rxe_wait(mw);
 
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 18cdf5e0ad4e..5402dae01554 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -7,6 +7,7 @@
 
 #include "rxe.h"
 
+#define RXE_POOL_TIMEOUT	(200)
 #define RXE_POOL_ALIGN		(16)
 
 static const struct rxe_type_info {
@@ -154,6 +155,7 @@ void *rxe_alloc(struct rxe_pool *pool)
 	elem->pool = pool;
 	elem->obj = obj;
 	kref_init(&elem->ref_cnt);
+	init_completion(&elem->complete);
 
 	err = xa_alloc_cyclic_bh(&pool->xa, &elem->index, elem, pool->limit,
 			&pool->next, GFP_KERNEL);
@@ -185,6 +187,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
 	elem->pool = pool;
 	elem->obj = (u8 *)elem - pool->elem_offset;
 	kref_init(&elem->ref_cnt);
+	init_completion(&elem->complete);
 
 	err = xa_alloc_cyclic_bh(&pool->xa, &elem->index, elem, pool->limit,
 			&pool->next, GFP_KERNEL);
@@ -212,31 +215,22 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 	return obj;
 }
 
-static void rxe_obj_free_rcu(struct rcu_head *rcu)
-{
-	struct rxe_pool_elem *elem = container_of(rcu, typeof(*elem), rcu);
-
-	kfree(elem->obj);
-}
-
 static void __rxe_elem_release_rcu(struct kref *kref)
 	__releases(&pool->xa.xa_lock)
 {
-	struct rxe_pool_elem *elem = container_of(kref,
-					struct rxe_pool_elem, ref_cnt);
+	struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
 	struct rxe_pool *pool = elem->pool;
 
 	__xa_erase(&pool->xa, elem->index);
 
-	spin_unlock(&pool->xa.xa_lock);
+	spin_unlock_bh(&pool->xa.xa_lock);
 
 	if (pool->cleanup)
 		pool->cleanup(elem);
 
 	atomic_dec(&pool->num_elem);
 
-	if (pool->flags & RXE_POOL_ALLOC)
-		call_rcu(&elem->rcu, rxe_obj_free_rcu);
+	complete(&elem->complete);
 }
 
 int __rxe_add_ref(struct rxe_pool_elem *elem)
@@ -244,8 +238,67 @@ int __rxe_add_ref(struct rxe_pool_elem *elem)
 	return kref_get_unless_zero(&elem->ref_cnt);
 }
 
+static bool refcount_dec_and_lock_bh(refcount_t *r, spinlock_t *lock)
+	__acquires(lock) __releases(lock)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	spin_lock_bh(lock);
+	if (!refcount_dec_and_test(r)) {
+		spin_unlock_bh(lock);
+		return false;
+	}
+
+	return true;
+}
+
+static int kref_put_lock_bh(struct kref *kref,
+				void (*release)(struct kref *kref),
+				spinlock_t *lock)
+{
+	if (refcount_dec_and_lock_bh(&kref->refcount, lock)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
+
 int __rxe_drop_ref(struct rxe_pool_elem *elem)
 {
-	return kref_put_lock(&elem->ref_cnt, __rxe_elem_release_rcu,
+	return kref_put_lock_bh(&elem->ref_cnt, __rxe_elem_release_rcu,
 			&elem->pool->xa.xa_lock);
 }
+
+static void rxe_obj_free_rcu(struct rcu_head *rcu)
+{
+	struct rxe_pool_elem *elem = container_of(rcu, typeof(*elem), rcu);
+
+	kfree(elem->obj);
+}
+
+int __rxe_wait(struct rxe_pool_elem *elem)
+{
+	struct rxe_pool *pool = elem->pool;
+	static int timeout = RXE_POOL_TIMEOUT;
+	static int timeout_failures;
+	int ret;
+
+	if (timeout) {
+		ret = wait_for_completion_timeout(&elem->complete, timeout);
+		if (!ret) {
+			if (timeout_failures++ == 5) {
+				timeout = 0;
+				pr_warn("Exceeded max completion timeouts. Disabling wait_for_completion\n");
+			} else {
+				pr_warn_ratelimited("Timed out waiting for %s#%d to complete\n",
+					pool->name + 4, elem->index);
+			}
+		}
+	}
+
+	if (pool->flags & RXE_POOL_ALLOC)
+		call_rcu(&elem->rcu, rxe_obj_free_rcu);
+
+	return ret;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 40026d746563..f085750c4c5a 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -29,6 +29,7 @@ struct rxe_pool_elem {
 	struct kref		ref_cnt;
 	struct list_head	list;
 	struct rcu_head		rcu;
+	struct completion	complete;
 	u32			index;
 };
 
@@ -67,4 +68,7 @@ int __rxe_add_ref(struct rxe_pool_elem *elem);
 int __rxe_drop_ref(struct rxe_pool_elem *elem);
 #define rxe_drop_ref(obj) __rxe_drop_ref(&(obj)->elem)
 
+int __rxe_wait(struct rxe_pool_elem *elem);
+#define rxe_wait(obj) __rxe_wait(&(obj)->elem)
+
 #endif /* RXE_POOL_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 3ca374f1cf9b..f2c1037696c5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -116,6 +116,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 	struct rxe_ucontext *uc = to_ruc(ibuc);
 
 	rxe_drop_ref(uc);
+	rxe_wait(uc);
 }
 
 static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -150,6 +151,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct rxe_pd *pd = to_rpd(ibpd);
 
 	rxe_drop_ref(pd);
+	rxe_wait(pd);
 	return 0;
 }
 
@@ -189,6 +191,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
 					 sizeof(uresp->ah_num));
 		if (err) {
 			rxe_drop_ref(ah);
+			rxe_wait(ah);
 			return -EFAULT;
 		}
 	} else if (ah->is_user) {
@@ -229,6 +232,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 	struct rxe_ah *ah = to_rah(ibah);
 
 	rxe_drop_ref(ah);
+	rxe_wait(ah);
 	return 0;
 }
 
@@ -315,6 +319,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 err2:
 	rxe_drop_ref(pd);
 	rxe_drop_ref(srq);
+	rxe_wait(srq);
 err1:
 	return err;
 }
@@ -373,6 +378,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 
 	rxe_drop_ref(srq->pd);
 	rxe_drop_ref(srq);
+	rxe_wait(srq);
 	return 0;
 }
 
@@ -442,6 +448,7 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
 
 qp_init:
 	rxe_drop_ref(qp);
+	rxe_wait(qp);
 	return err;
 }
 
@@ -496,6 +503,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
 	rxe_qp_destroy(qp);
 	rxe_drop_ref(qp);
+	rxe_wait(qp);
 	return 0;
 }
 
@@ -807,6 +815,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 	rxe_cq_disable(cq);
 
 	rxe_drop_ref(cq);
+	rxe_wait(cq);
 	return 0;
 }
 
@@ -932,6 +941,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 err3:
 	rxe_drop_ref(pd);
 	rxe_drop_ref(mr);
+	rxe_wait(mr);
 err2:
 	return ERR_PTR(err);
 }
@@ -964,6 +974,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 err2:
 	rxe_drop_ref(pd);
 	rxe_drop_ref(mr);
+	rxe_wait(mr);
 err1:
 	return ERR_PTR(err);
 }
-- 
2.32.0


  parent reply	other threads:[~2022-01-27 21:38 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-27 21:37 [RFC PATCH v9 00/26] Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 01/26] RDMA/rxe: Move rxe_mcast_add/delete to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 02/26] RDMA/rxe: Move rxe_mcast_attach/detach " Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 03/26] RDMA/rxe: Rename rxe_mc_grp and rxe_mc_elem Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 04/26] RDMA/rxe: Enforce IBA o10-2.2.3 Bob Pearson
2022-01-28 12:53   ` Jason Gunthorpe
2022-01-28 16:18     ` Bob Pearson
2022-01-28 16:42       ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 05/26] RDMA/rxe: Remove rxe_drop_all_macst_groups Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 06/26] RDMA/rxe: Remove qp->grp_lock and qp->grp_list Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 07/26] RDMA/rxe: Use kzmalloc/kfree for mca Bob Pearson
2022-01-28 18:00   ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 08/26] RDMA/rxe: Rename grp to mcg and mce to mca Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 09/26] RDMA/rxe: Introduce RXECB(skb) Bob Pearson
2022-01-28 18:29   ` Jason Gunthorpe
2022-01-30 17:47     ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 10/26] RDMA/rxe: Split rxe_rcv_mcast_pkt into two phases Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 11/26] RDMA/rxe: Replace locks by rxe->mcg_lock Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 12/26] RDMA/rxe: Replace pool key by rxe->mcg_tree Bob Pearson
2022-01-28 18:32   ` Jason Gunthorpe
2022-01-30 23:23     ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 13/26] RDMA/rxe: Remove key'ed object support Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 14/26] RDMA/rxe: Remove mcg from rxe pools Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 15/26] RDMA/rxe: Add code to cleanup mcast memory Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 16/26] RDMA/rxe: Add comments to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 17/26] RDMA/rxe: Separate code into subroutines Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 18/26] RDMA/rxe: Convert mca read locking to RCU Bob Pearson
2022-01-28 18:39   ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 19/26] RDMA/rxe: Reverse the sense of RXE_POOL_NO_ALLOC Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 20/26] RDMA/rxe: Delete _locked() APIs for pool objects Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 21/26] RDMA/rxe: Replace obj by elem in declaration Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 22/26] RDMA/rxe: Replace red-black trees by xarrays Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 23/26] RDMA/rxe: Change pool locking to RCU Bob Pearson
2022-01-27 21:37 ` Bob Pearson [this message]
2022-01-28  3:58   ` [RFC PATCH v9 24/26] RDMA/rxe: Add wait_for_completion to pool objects kernel test robot
2022-01-28  3:58     ` kernel test robot
2022-01-27 21:37 ` [RFC PATCH v9 25/26] RDMA/rxe: Fix ref error in rxe_av.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 26/26] RDMA/rxe: Replace mr by rkey in responder resources Bob Pearson
2022-01-28 18:42 ` [RFC PATCH v9 00/26] Jason Gunthorpe
2022-02-07 19:20   ` Bob Pearson
2022-02-07 19:38     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220127213755.31697-25-rpearsonhpe@gmail.com \
    --to=rpearsonhpe@gmail.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.