All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bob Pearson <rpearsonhpe@gmail.com>
To: jgg@nvidia.com, zyjzyj2000@gmail.com, linux-rdma@vger.kernel.org
Cc: Bob Pearson <rpearsonhpe@gmail.com>
Subject: [RFC PATCH v9 10/26] RDMA/rxe: Split rxe_rcv_mcast_pkt into two phases
Date: Thu, 27 Jan 2022 15:37:39 -0600	[thread overview]
Message-ID: <20220127213755.31697-11-rpearsonhpe@gmail.com> (raw)
In-Reply-To: <20220127213755.31697-1-rpearsonhpe@gmail.com>

Currently rxe_rcv_mcast_pkt performs most of its work under the
mcg->mcg_lock and calls into rxe_rcv which queues the packets
to the responder and completer tasklets holding the lock which is
a very bad idea. This patch walks the qp_list in mcg and copies the
qp addresses to a dynamically allocated array under the lock but
does the rest of the work without holding the lock. The critical
section is now very small.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_mcast.c | 11 +++++----
 drivers/infiniband/sw/rxe/rxe_recv.c  | 33 +++++++++++++++++++++++----
 drivers/infiniband/sw/rxe/rxe_verbs.h |  2 +-
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index ed1b9ca65da3..3b66019fc26d 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -113,16 +113,16 @@ static int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 		}
 	}
 
-	if (mcg->num_qp >= rxe->attr.max_mcast_qp_attach) {
+	if (atomic_read(&mcg->qp_num) >= rxe->attr.max_mcast_qp_attach) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	mcg->num_qp++;
+	atomic_inc(&mcg->qp_num);
 	new_mca->qp = qp;
 	atomic_inc(&qp->mcg_num);
 
-	list_add(&new_mca->qp_list, &mcg->qp_list);
+	list_add_tail(&new_mca->qp_list, &mcg->qp_list);
 
 	err = 0;
 out:
@@ -135,6 +135,7 @@ static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 {
 	struct rxe_mcg *mcg;
 	struct rxe_mca *mca, *tmp;
+	int n;
 
 	mcg = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
 	if (!mcg)
@@ -145,8 +146,8 @@ static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 	list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
 		if (mca->qp == qp) {
 			list_del(&mca->qp_list);
-			mcg->num_qp--;
-			if (mcg->num_qp <= 0)
+			n = atomic_dec_return(&mcg->qp_num);
+			if (n <= 0)
 				rxe_drop_ref(mcg);
 			atomic_dec(&qp->mcg_num);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 10020103ea4a..41571c6b7d98 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -229,6 +229,11 @@ static inline void rxe_rcv_pkt(struct sk_buff *skb)
 		rxe_comp_queue_pkt(RXECB(skb)->qp, skb);
 }
 
+/* split processing of the qp list into two stages.
+ * first just make a simple linear array from the
+ * current list while holding the lock and then
+ * process each qp without holding the lock.
+ */
 static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
 {
 	struct sk_buff *s;
@@ -237,7 +242,9 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
 	struct rxe_mcg *mcg;
 	struct rxe_mca *mca;
 	struct rxe_qp *qp;
+	struct rxe_qp **qp_array;
 	union ib_gid dgid;
+	int n, nmax;
 	int err;
 
 	if (skb->protocol == htons(ETH_P_IP))
@@ -251,15 +258,31 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
 	if (!mcg)
 		goto drop;	/* mcast group not registered */
 
+	/* this is the current number of qp's attached to mcg plus a
+	 * little room in case new qp's are attached. It isn't wrong
+	 * to miss some qp's since it is just a matter of precisely
+	 * when the packet is assumed to be received.
+	 */
+	nmax = atomic_read(&mcg->qp_num) + 2;
+	qp_array = kmalloc_array(nmax, sizeof(qp), GFP_KERNEL);
+
+	n = 0;
 	spin_lock_bh(&mcg->mcg_lock);
+	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+		qp_array[n++] = mca->qp;
+		if (n == nmax)
+			break;
+	}
+	spin_unlock_bh(&mcg->mcg_lock);
+	nmax = n;
 
 	/* this is unreliable datagram service so we let
 	 * failures to deliver a multicast packet to a
 	 * single QP happen and just move on and try
 	 * the rest of them on the list
 	 */
-	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
-		qp = mca->qp;
+	for (n = 0; n < nmax; n++) {
+		qp = qp_array[n];
 
 		/* validate qp for incoming packet */
 		err = check_type_state(rxe, pkt, qp);
@@ -274,8 +297,8 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
 		 * skb and pass to the QP. Pass the original skb to
 		 * the last QP in the list.
 		 */
-		if (mca->qp_list.next != &mcg->qp_list) {
-			s = skb_clone(skb, GFP_ATOMIC);
+		if (n < nmax - 1) {
+			s = skb_clone(skb, GFP_KERNEL);
 			if (unlikely(!s))
 				continue;
 
@@ -295,7 +318,7 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
 		}
 	}
 
-	spin_unlock_bh(&mcg->mcg_lock);
+	kfree(qp_array);
 
 	rxe_drop_ref(mcg);	/* drop ref from rxe_pool_get_key. */
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 02745d51c163..d65c358798c6 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -356,8 +356,8 @@ struct rxe_mcg {
 	spinlock_t		mcg_lock; /* guard group */
 	struct rxe_dev		*rxe;
 	struct list_head	qp_list;
+	atomic_t		qp_num;
 	union ib_gid		mgid;
-	int			num_qp;
 	u32			qkey;
 	u16			pkey;
 };
-- 
2.32.0


  parent reply	other threads:[~2022-01-27 21:38 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-27 21:37 [RFC PATCH v9 00/26] Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 01/26] RDMA/rxe: Move rxe_mcast_add/delete to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 02/26] RDMA/rxe: Move rxe_mcast_attach/detach " Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 03/26] RDMA/rxe: Rename rxe_mc_grp and rxe_mc_elem Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 04/26] RDMA/rxe: Enforce IBA o10-2.2.3 Bob Pearson
2022-01-28 12:53   ` Jason Gunthorpe
2022-01-28 16:18     ` Bob Pearson
2022-01-28 16:42       ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 05/26] RDMA/rxe: Remove rxe_drop_all_macst_groups Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 06/26] RDMA/rxe: Remove qp->grp_lock and qp->grp_list Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 07/26] RDMA/rxe: Use kzmalloc/kfree for mca Bob Pearson
2022-01-28 18:00   ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 08/26] RDMA/rxe: Rename grp to mcg and mce to mca Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 09/26] RDMA/rxe: Introduce RXECB(skb) Bob Pearson
2022-01-28 18:29   ` Jason Gunthorpe
2022-01-30 17:47     ` Bob Pearson
2022-01-27 21:37 ` Bob Pearson [this message]
2022-01-27 21:37 ` [RFC PATCH v9 11/26] RDMA/rxe: Replace locks by rxe->mcg_lock Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 12/26] RDMA/rxe: Replace pool key by rxe->mcg_tree Bob Pearson
2022-01-28 18:32   ` Jason Gunthorpe
2022-01-30 23:23     ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 13/26] RDMA/rxe: Remove key'ed object support Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 14/26] RDMA/rxe: Remove mcg from rxe pools Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 15/26] RDMA/rxe: Add code to cleanup mcast memory Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 16/26] RDMA/rxe: Add comments to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 17/26] RDMA/rxe: Separate code into subroutines Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 18/26] RDMA/rxe: Convert mca read locking to RCU Bob Pearson
2022-01-28 18:39   ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 19/26] RDMA/rxe: Reverse the sense of RXE_POOL_NO_ALLOC Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 20/26] RDMA/rxe: Delete _locked() APIs for pool objects Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 21/26] RDMA/rxe: Replace obj by elem in declaration Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 22/26] RDMA/rxe: Replace red-black trees by xarrays Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 23/26] RDMA/rxe: Change pool locking to RCU Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 24/26] RDMA/rxe: Add wait_for_completion to pool objects Bob Pearson
2022-01-28  3:58   ` kernel test robot
2022-01-28  3:58     ` kernel test robot
2022-01-27 21:37 ` [RFC PATCH v9 25/26] RDMA/rxe: Fix ref error in rxe_av.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 26/26] RDMA/rxe: Replace mr by rkey in responder resources Bob Pearson
2022-01-28 18:42 ` [RFC PATCH v9 00/26] Jason Gunthorpe
2022-02-07 19:20   ` Bob Pearson
2022-02-07 19:38     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220127213755.31697-11-rpearsonhpe@gmail.com \
    --to=rpearsonhpe@gmail.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.