All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Mike Marciniszyn
	<mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
	Sebastian Sanchez
	<sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 07/27] IB/rdmavt: Use per-CPU reference count for MRs
Date: Wed, 08 Feb 2017 05:26:31 -0800	[thread overview]
Message-ID: <20170208132630.16442.88555.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20170208132142.16442.69329.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>

From: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Having per-CPU reference count for each MR prevents
cache-line bouncing across the system. Thus, it
prevents bottlenecks. Use per-CPU reference counts
per MR.

The per-CPU reference count for FMRs is used in
atomic mode to allow accurate testing of the busy
state. Other MR types run in per-CPU mode MR until
they're freed.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/sw/rdmavt/mr.c |   59 ++++++++++++++++++++++++-------------
 include/rdma/rdmavt_mr.h          |   10 +++---
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 52fd152..c80a69b 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -120,10 +120,19 @@ static void rvt_deinit_mregion(struct rvt_mregion *mr)
 	mr->mapsz = 0;
 	while (i)
 		kfree(mr->map[--i]);
+	percpu_ref_exit(&mr->refcount);
+}
+
+static void __rvt_mregion_complete(struct percpu_ref *ref)
+{
+	struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
+					      refcount);
+
+	complete(&mr->comp);
 }
 
 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
-			    int count)
+			    int count, unsigned int percpu_flags)
 {
 	int m, i = 0;
 	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
@@ -133,19 +142,23 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
 	for (; i < m; i++) {
 		mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
 					  dev->dparms.node);
-		if (!mr->map[i]) {
-			rvt_deinit_mregion(mr);
-			return -ENOMEM;
-		}
+		if (!mr->map[i])
+			goto bail;
 		mr->mapsz++;
 	}
 	init_completion(&mr->comp);
 	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
+	if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
+			    percpu_flags, GFP_KERNEL))
+		goto bail;
+
 	atomic_set(&mr->lkey_invalid, 0);
 	mr->pd = pd;
 	mr->max_segs = count;
 	return 0;
+bail:
+	rvt_deinit_mregion(mr);
+	return -ENOMEM;
 }
 
 /**
@@ -180,8 +193,7 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 		if (!tmr) {
 			rcu_assign_pointer(dev->dma_mr, mr);
 			mr->lkey_published = 1;
-		} else {
-			rvt_put_mr(mr);
+			rvt_get_mr(mr);
 		}
 		goto success;
 	}
@@ -239,11 +251,14 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
 	int freed = 0;
 
 	spin_lock_irqsave(&rkt->lock, flags);
-	if (!mr->lkey_published)
-		goto out;
-	if (lkey == 0) {
-		RCU_INIT_POINTER(dev->dma_mr, NULL);
+	if (!lkey) {
+		if (mr->lkey_published) {
+			RCU_INIT_POINTER(dev->dma_mr, NULL);
+			rvt_put_mr(mr);
+		}
 	} else {
+		if (!mr->lkey_published)
+			goto out;
 		r = lkey >> (32 - dev->dparms.lkey_table_size);
 		RCU_INIT_POINTER(rkt->table[r], NULL);
 	}
@@ -253,7 +268,7 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
 	spin_unlock_irqrestore(&rkt->lock, flags);
 	if (freed) {
 		synchronize_rcu();
-		rvt_put_mr(mr);
+		percpu_ref_kill(&mr->refcount);
 	}
 }
 
@@ -269,7 +284,7 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
 	if (!mr)
 		goto bail;
 
-	rval = rvt_init_mregion(&mr->mr, pd, count);
+	rval = rvt_init_mregion(&mr->mr, pd, count, 0);
 	if (rval)
 		goto bail;
 	/*
@@ -294,8 +309,8 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
 
 static void __rvt_free_mr(struct rvt_mr *mr)
 {
-	rvt_deinit_mregion(&mr->mr);
 	rvt_free_lkey(&mr->mr);
+	rvt_deinit_mregion(&mr->mr);
 	kfree(mr);
 }
 
@@ -323,7 +338,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
 		goto bail;
 	}
 
-	rval = rvt_init_mregion(&mr->mr, pd, 0);
+	rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
 	if (rval) {
 		ret = ERR_PTR(rval);
 		goto bail;
@@ -445,8 +460,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr)
 	timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
 	if (!timeout) {
 		rvt_pr_err(rdi,
-			   "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
-			   mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+			   "rvt_dereg_mr timeout mr %p pd %p\n",
+			   mr, mr->mr.pd);
 		rvt_get_mr(&mr->mr);
 		ret = -EBUSY;
 		goto out;
@@ -623,7 +638,8 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
 	if (!fmr)
 		goto bail;
 
-	rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+	rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
+				PERCPU_REF_INIT_ATOMIC);
 	if (rval)
 		goto bail;
 
@@ -674,11 +690,12 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 	struct rvt_fmr *fmr = to_ifmr(ibfmr);
 	struct rvt_lkey_table *rkt;
 	unsigned long flags;
-	int m, n, i;
+	int m, n;
+	unsigned long i;
 	u32 ps;
 	struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
 
-	i = atomic_read(&fmr->mr.refcount);
+	i = atomic_long_read(&fmr->mr.refcount.count);
 	if (i > 2)
 		return -EBUSY;
 
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
index de59de2..05698d8 100644
--- a/include/rdma/rdmavt_mr.h
+++ b/include/rdma/rdmavt_mr.h
@@ -52,6 +52,7 @@
  * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once
  * drivers no longer need access to the MR directly.
  */
+#include <linux/percpu-refcount.h>
 
 /*
  * A segment is a linear region of low physical memory.
@@ -79,11 +80,11 @@ struct rvt_mregion {
 	int access_flags;
 	u32 max_segs;           /* number of rvt_segs in all the arrays */
 	u32 mapsz;              /* size of the map array */
+	atomic_t lkey_invalid;	/* true if current lkey is invalid */
 	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
 	u8  lkey_published;     /* in global table */
-	atomic_t lkey_invalid;	/* true if current lkey is invalid */
+	struct percpu_ref refcount;
 	struct completion comp; /* complete when refcount goes to zero */
-	atomic_t refcount;
 	struct rvt_segarray *map[0];    /* the segments */
 };
 
@@ -123,13 +124,12 @@ struct rvt_sge_state {
 
 static inline void rvt_put_mr(struct rvt_mregion *mr)
 {
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		complete(&mr->comp);
+	percpu_ref_put(&mr->refcount);
 }
 
 static inline void rvt_get_mr(struct rvt_mregion *mr)
 {
-	atomic_inc(&mr->refcount);
+	percpu_ref_get(&mr->refcount);
 }
 
 static inline void rvt_put_ss(struct rvt_sge_state *ss)

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-02-08 13:26 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-08 13:25 [PATCH 00/27] IB/hfi1,qib,rdmavt: Patches for 4.11 Dennis Dalessandro
     [not found] ` <20170208132142.16442.69329.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-08 13:25   ` [PATCH 01/27] IB/hfi1: Correct defered count after processing qp_wait_list Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 02/27] IB/hfi1: Process qp wait list in IRQ thread periodically Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 03/27] IB/hfi1: Ensure read of producer s_head is correct Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 04/27] IB/hfi1: Use static CTLE with Preset 6 for integrated HFIs Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 05/27] IB/hfi1: Correct error calldown locking Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 06/27] IB/hfi1: Access hfi1_ibport through rcd pointer Dennis Dalessandro
2017-02-08 13:26   ` Dennis Dalessandro [this message]
2017-02-08 13:26   ` [PATCH 08/27] IB/hfi1: Allocate context data on memory node Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 09/27] IB/hfi1: Add additional fields to qp_stats Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 10/27] IB/hfi1: Reduce oversized fields in struct hfi1_packet Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 11/27] IB/hfi1: Check upper-case EFI variables Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 12/27] IB/hfi1, qib, rdmavt: Move two IB event functions into rdmavt Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 13/27] IB/hfi1, qib, rdmavt: Move AETH credit " Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 14/27] IB/rdmavt: Adding timer logic to rdmavt Dennis Dalessandro
     [not found]     ` <20170208132712.16442.57028.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:33       ` Leon Romanovsky
2017-02-08 13:27   ` [PATCH 15/27] IB/hfi1: Use new rdmavt timers Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 16/27] IB/qib: " Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 17/27] IB/hfi1, rdmavt: Update copy_sge to use boolean arguments Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 18/27] IB/hfi1, rdmavt: Move SGE state helper routines into rdmavt Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 19/27] IB/qib: Updates to use rdmavt's SGE helper routines Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 20/27] IB/rdmavt, IB/hfi1, IB/qib: Correct ack count for passive (RTR) QPs Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 21/27] IB/hfi1: Modify logging frequency of DCC errors Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 22/27] IB/hfi1: Add receive fault injection feature Dennis Dalessandro
     [not found]     ` <20170208132800.16442.94549.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:42       ` Leon Romanovsky
     [not found]         ` <20170212174205.GI14015-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-02-14 21:51           ` Doug Ledford
     [not found]             ` <1487109065.86943.86.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-02-28 17:55               ` Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 23/27] IB/hfi1: Add transmit " Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 24/27] IB/hfi1: Do not set physical link state if DC is in the shutdown state Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 25/27] IB/hfi1: Add rvt_rnr_tbl_to_usec function Dennis Dalessandro
     [not found]     ` <20170208132818.16442.38634.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:48       ` Leon Romanovsky
2017-02-08 13:28   ` [PATCH 26/27] IB/hfi1, qib, rdmavt: Move AETH defines to rdma/ib_hdrs.h Dennis Dalessandro
     [not found]     ` <20170208132824.16442.61753.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:47       ` Leon Romanovsky
2017-02-08 13:28   ` [PATCH 27/27] IB/hfi1: Code reuse with memdup_copy Dennis Dalessandro
2017-02-19 13:47   ` [PATCH 00/27] IB/hfi1,qib,rdmavt: Patches for 4.11 Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170208132630.16442.88555.stgit@scvm10.sc.intel.com \
    --to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.