linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@ziepe.ca>
To: linux-mm@kvack.org, Jerome Glisse <jglisse@redhat.com>,
	Ralph Campbell <rcampbell@nvidia.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Felix.Kuehling@amd.com
Cc: linux-rdma@vger.kernel.org, dri-devel@lists.freedesktop.org,
	amd-gfx@lists.freedesktop.org,
	"Alex Deucher" <alexander.deucher@amd.com>,
	"Ben Skeggs" <bskeggs@redhat.com>,
	"Boris Ostrovsky" <boris.ostrovsky@oracle.com>,
	"Christian König" <christian.koenig@amd.com>,
	"David Zhou" <David1.Zhou@amd.com>,
	"Dennis Dalessandro" <dennis.dalessandro@intel.com>,
	"Juergen Gross" <jgross@suse.com>,
	"Mike Marciniszyn" <mike.marciniszyn@intel.com>,
	"Oleksandr Andrushchenko" <oleksandr_andrushchenko@epam.com>,
	"Petr Cvek" <petrcvekcz@gmail.com>,
	"Stefano Stabellini" <sstabellini@kernel.org>,
	nouveau@lists.freedesktop.org, xen-devel@lists.xenproject.org,
	"Christoph Hellwig" <hch@infradead.org>,
	"Jason Gunthorpe" <jgg@mellanox.com>
Subject: [PATCH v2 06/15] RDMA/hfi1: Use mmu_range_notifier_inset for user_exp_rcv
Date: Mon, 28 Oct 2019 17:10:23 -0300	[thread overview]
Message-ID: <20191028201032.6352-7-jgg@ziepe.ca> (raw)
In-Reply-To: <20191028201032.6352-1-jgg@ziepe.ca>

From: Jason Gunthorpe <jgg@mellanox.com>

This converts one of the two users of mmu_notifiers to use the new API.
The conversion is fairly straightforward, however the existing use of
notifiers here seems to be racey.

Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
Cc: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/file_ops.c     |   2 +-
 drivers/infiniband/hw/hfi1/hfi.h          |   2 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c | 146 +++++++++-------------
 drivers/infiniband/hw/hfi1/user_exp_rcv.h |   3 +-
 4 files changed, 60 insertions(+), 93 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f9a7e9d29c8ba2..7c5e3fb224139a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1138,7 +1138,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
 			HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
 			HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
 	/* adjust flag if this fd is not able to cache */
-	if (!fd->handler)
+	if (!fd->use_mn)
 		cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
 
 	cinfo.num_active = hfi1_count_active_units();
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index fa45350a9a1d32..fc10d65fc3e13c 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1444,7 +1444,7 @@ struct hfi1_filedata {
 	/* for cpu affinity; -1 if none */
 	int rec_cpu_num;
 	u32 tid_n_pinned;
-	struct mmu_rb_handler *handler;
+	bool use_mn;
 	struct tid_rb_node **entry_to_rb;
 	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
 	u32 tid_limit;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 3592a9ec155e85..a1ab3bd334f89e 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -59,11 +59,11 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
 			      struct tid_user_buf *tbuf,
 			      u32 rcventry, struct tid_group *grp,
 			      u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
 				    struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_range_notifier *mrn,
+			      const struct mmu_notifier_range *range,
+			      unsigned long cur_seq);
 static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
 			    struct tid_group *grp,
 			    unsigned int start, u16 count,
@@ -73,10 +73,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
 			      struct tid_group **grp);
 static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
 
-static struct mmu_rb_ops tid_rb_ops = {
-	.insert = tid_rb_insert,
-	.remove = tid_rb_remove,
-	.invalidate = tid_rb_invalidate
+static const struct mmu_range_notifier_ops tid_mn_ops = {
+	.invalidate = tid_rb_invalidate,
 };
 
 /*
@@ -87,7 +85,6 @@ static struct mmu_rb_ops tid_rb_ops = {
 int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
 			   struct hfi1_ctxtdata *uctxt)
 {
-	struct hfi1_devdata *dd = uctxt->dd;
 	int ret = 0;
 
 	spin_lock_init(&fd->tid_lock);
@@ -109,20 +106,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
 			fd->entry_to_rb = NULL;
 			return -ENOMEM;
 		}
-
-		/*
-		 * Register MMU notifier callbacks. If the registration
-		 * fails, continue without TID caching for this context.
-		 */
-		ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
-					   dd->pport->hfi1_wq,
-					   &fd->handler);
-		if (ret) {
-			dd_dev_info(dd,
-				    "Failed MMU notifier registration %d\n",
-				    ret);
-			ret = 0;
-		}
+		fd->use_mn = true;
 	}
 
 	/*
@@ -139,7 +123,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
 	 * init.
 	 */
 	spin_lock(&fd->tid_lock);
-	if (uctxt->subctxt_cnt && fd->handler) {
+	if (uctxt->subctxt_cnt && fd->use_mn) {
 		u16 remainder;
 
 		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -158,18 +142,10 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
 {
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 
-	/*
-	 * The notifier would have been removed when the process'es mm
-	 * was freed.
-	 */
-	if (fd->handler) {
-		hfi1_mmu_rb_unregister(fd->handler);
-	} else {
-		if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
-			unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
-		if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
-			unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
-	}
+	if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+		unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+	if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+		unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
 
 	kfree(fd->invalid_tids);
 	fd->invalid_tids = NULL;
@@ -201,7 +177,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
 
 	if (mapped) {
 		pci_unmap_single(dd->pcidev, node->dma_addr,
-				 node->mmu.len, PCI_DMA_FROMDEVICE);
+				 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
 		pages = &node->pages[idx];
 	} else {
 		pages = &tidbuf->pages[idx];
@@ -777,8 +753,8 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
 		return -EFAULT;
 	}
 
-	node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
-	node->mmu.len = npages * PAGE_SIZE;
+	node->notifier.ops = &tid_mn_ops;
+	node->fdata = fd;
 	node->phys = page_to_phys(pages[0]);
 	node->npages = npages;
 	node->rcventry = rcventry;
@@ -787,23 +763,34 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
 	node->freed = false;
 	memcpy(node->pages, pages, sizeof(struct page *) * npages);
 
-	if (!fd->handler)
-		ret = tid_rb_insert(fd, &node->mmu);
-	else
-		ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
-
-	if (ret) {
-		hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
-			  node->rcventry, node->mmu.addr, node->phys, ret);
-		pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
-				 PCI_DMA_FROMDEVICE);
-		kfree(node);
-		return -EFAULT;
+	if (fd->use_mn) {
+		ret = mmu_range_notifier_insert(
+			&node->notifier, tbuf->vaddr + (pageidx * PAGE_SIZE),
+			npages * PAGE_SIZE, fd->mm);
+		if (ret)
+			goto out_unmap;
+		/*
+		 * FIXME: This is in the wrong order, the notifier should be
+		 * established before the pages are pinned by pin_rcv_pages.
+		 */
+		mmu_range_read_begin(&node->notifier);
 	}
+	fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
 	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
 	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
-			       node->mmu.addr, node->phys, phys);
+			       node->notifier.interval_tree.start, node->phys,
+			       phys);
 	return 0;
+
+out_unmap:
+	hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+		  node->rcventry, node->notifier.interval_tree.start,
+		  node->phys, ret);
+	pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+			 PCI_DMA_FROMDEVICE);
+	kfree(node);
+	return -EFAULT;
 }
 
 static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
@@ -833,10 +820,9 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
 	if (grp)
 		*grp = node->grp;
 
-	if (!fd->handler)
-		cacheless_tid_rb_remove(fd, node);
-	else
-		hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+	if (fd->use_mn)
+		mmu_range_notifier_remove(&node->notifier);
+	cacheless_tid_rb_remove(fd, node);
 
 	return 0;
 }
@@ -847,7 +833,8 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
 	struct hfi1_devdata *dd = uctxt->dd;
 
 	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
-				 node->npages, node->mmu.addr, node->phys,
+				 node->npages,
+				 node->notifier.interval_tree.start, node->phys,
 				 node->dma_addr);
 
 	/*
@@ -894,30 +881,29 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
 				if (!node || node->rcventry != rcventry)
 					continue;
 
+				if (fd->use_mn)
+					mmu_range_notifier_remove(
+						&node->notifier);
 				cacheless_tid_rb_remove(fd, node);
 			}
 		}
 	}
 }
 
-/*
- * Always return 0 from this function.  A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM.  Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away.  PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_range_notifier *mrn,
+			      const struct mmu_notifier_range *range,
+			      unsigned long cur_seq)
 {
-	struct hfi1_filedata *fdata = arg;
-	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
 	struct tid_rb_node *node =
-		container_of(mnode, struct tid_rb_node, mmu);
+		container_of(mrn, struct tid_rb_node, notifier);
+	struct hfi1_filedata *fdata = node->fdata;
+	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
 
 	if (node->freed)
-		return 0;
+		return true;
 
-	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+				 node->notifier.interval_tree.start,
 				 node->rcventry, node->npages, node->dma_addr);
 	node->freed = true;
 
@@ -946,18 +932,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
 		fdata->invalid_tid_idx++;
 	}
 	spin_unlock(&fdata->invalid_lock);
-	return 0;
-}
-
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
-{
-	struct hfi1_filedata *fdata = arg;
-	struct tid_rb_node *tnode =
-		container_of(node, struct tid_rb_node, mmu);
-	u32 base = fdata->uctxt->expected_base;
-
-	fdata->entry_to_rb[tnode->rcventry - base] = tnode;
-	return 0;
+	return true;
 }
 
 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -968,12 +943,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
 	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
 	clear_tid_node(fdata, tnode);
 }
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
-	struct hfi1_filedata *fdata = arg;
-	struct tid_rb_node *tnode =
-		container_of(node, struct tid_rb_node, mmu);
-
-	cacheless_tid_rb_remove(fdata, tnode);
-}
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 43b105de1d5427..b5314db083b125 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -65,7 +65,8 @@ struct tid_user_buf {
 };
 
 struct tid_rb_node {
-	struct mmu_rb_node mmu;
+	struct mmu_range_notifier notifier;
+	struct hfi1_filedata *fdata;
 	unsigned long phys;
 	struct tid_group *grp;
 	u32 rcventry;
-- 
2.23.0


  parent reply	other threads:[~2019-10-28 20:10 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-28 20:10 [PATCH v2 00/15] Consolidate the mmu notifier interval_tree and locking Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 01/15] mm/mmu_notifier: define the header pre-processor parts even if disabled Jason Gunthorpe
2019-11-05 21:23   ` John Hubbard
2019-11-06 13:36     ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 02/15] mm/mmu_notifier: add an interval tree notifier Jason Gunthorpe
2019-10-29 22:04   ` Kuehling, Felix
2019-10-29 22:56     ` Jason Gunthorpe
2019-11-07  0:23   ` John Hubbard
2019-11-07  2:08     ` Jerome Glisse
2019-11-07 20:11       ` Jason Gunthorpe
2019-11-07 21:04         ` Jerome Glisse
2019-11-08  0:32           ` Jason Gunthorpe
2019-11-08  2:00             ` Jerome Glisse
2019-11-08 20:19               ` Jason Gunthorpe
2019-11-07 20:06     ` Jason Gunthorpe
2019-11-07 20:53       ` John Hubbard
2019-11-08 15:26         ` Jason Gunthorpe
2019-11-08  6:33       ` Christoph Hellwig
2019-11-08 13:43         ` Jerome Glisse
2019-10-28 20:10 ` [PATCH v2 03/15] mm/hmm: allow hmm_range to be used with a mmu_range_notifier or hmm_mirror Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 04/15] mm/hmm: define the pre-processor related parts of hmm.h even if disabled Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 05/15] RDMA/odp: Use mmu_range_notifier_insert() Jason Gunthorpe
2019-10-28 20:10 ` Jason Gunthorpe [this message]
2019-10-29 12:19   ` [PATCH v2 06/15] RDMA/hfi1: Use mmu_range_notifier_inset for user_exp_rcv Dennis Dalessandro
2019-10-29 12:51     ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 07/15] drm/radeon: use mmu_range_notifier_insert Jason Gunthorpe
2019-10-29  7:48   ` Koenig, Christian
2019-10-28 20:10 ` [PATCH v2 08/15] xen/gntdev: Use select for DMA_SHARED_BUFFER Jason Gunthorpe
2019-11-01 18:26   ` Jason Gunthorpe
2019-11-05 14:44     ` Jürgen Groß
2019-11-07  9:39   ` Jürgen Groß
2019-10-28 20:10 ` [PATCH v2 09/15] xen/gntdev: use mmu_range_notifier_insert Jason Gunthorpe
2019-10-30 16:55   ` Boris Ostrovsky
2019-11-01 17:48     ` Jason Gunthorpe
2019-11-01 18:51       ` Boris Ostrovsky
2019-11-01 19:17         ` Jason Gunthorpe
2019-11-04 22:03   ` Boris Ostrovsky
2019-11-05  2:31     ` Jason Gunthorpe
2019-11-05 15:16       ` Boris Ostrovsky
2019-11-07 20:36         ` Jason Gunthorpe
2019-11-07 22:54           ` Boris Ostrovsky
2019-11-08 14:53             ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 10/15] nouveau: use mmu_notifier directly for invalidate_range_start Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 11/15] nouveau: use mmu_range_notifier instead of hmm_mirror Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 12/15] drm/amdgpu: Call find_vma under mmap_sem Jason Gunthorpe
2019-10-29  7:49   ` Koenig, Christian
2019-10-29 16:28   ` Kuehling, Felix
2019-10-29 13:07     ` Christian König
2019-10-29 17:19     ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 13/15] drm/amdgpu: Use mmu_range_insert instead of hmm_mirror Jason Gunthorpe
2019-10-29  7:51   ` Koenig, Christian
2019-10-29 13:59     ` Jason Gunthorpe
2019-10-29 22:14   ` Kuehling, Felix
2019-10-29 23:09     ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 14/15] drm/amdgpu: Use mmu_range_notifier " Jason Gunthorpe
2019-10-29 19:22   ` Yang, Philip
2019-10-29 19:25     ` Jason Gunthorpe
2019-11-01 14:44       ` Yang, Philip
2019-11-01 15:12         ` Jason Gunthorpe
2019-11-01 15:59           ` Yang, Philip
2019-11-01 17:42             ` Jason Gunthorpe
2019-11-01 19:19               ` Jason Gunthorpe
2019-11-01 19:45               ` Yang, Philip
2019-11-01 19:50                 ` Yang, Philip
2019-11-01 19:51                 ` Jason Gunthorpe
2019-11-01 18:21         ` Jason Gunthorpe
2019-11-01 18:34         ` [PATCH v2a " Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 15/15] mm/hmm: remove hmm_mirror and related Jason Gunthorpe
2019-11-01 19:54 ` [PATCH v2 00/15] Consolidate the mmu notifier interval_tree and locking Jason Gunthorpe
2019-11-01 20:54 ` Ralph Campbell
2019-11-04 20:40   ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191028201032.6352-7-jgg@ziepe.ca \
    --to=jgg@ziepe.ca \
    --cc=David1.Zhou@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bskeggs@redhat.com \
    --cc=christian.koenig@amd.com \
    --cc=dennis.dalessandro@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hch@infradead.org \
    --cc=jgg@mellanox.com \
    --cc=jglisse@redhat.com \
    --cc=jgross@suse.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-mm@kvack.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mike.marciniszyn@intel.com \
    --cc=nouveau@lists.freedesktop.org \
    --cc=oleksandr_andrushchenko@epam.com \
    --cc=petrcvekcz@gmail.com \
    --cc=rcampbell@nvidia.com \
    --cc=sstabellini@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).