linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ira.weiny@intel.com
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>,
	Dan Williams <dan.j.williams@intel.com>,
	Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	"Theodore Ts'o" <tytso@mit.edu>,
	John Hubbard <jhubbard@nvidia.com>,
	Michal Hocko <mhocko@suse.com>,
	Dave Chinner <david@fromorbit.com>,
	linux-xfs@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org,
	linux-mm@kvack.org, Ira Weiny <ira.weiny@intel.com>
Subject: [RFC PATCH v2 17/19] RDMA/umem: Convert to vaddr_[pin|unpin]* operations.
Date: Fri,  9 Aug 2019 15:58:31 -0700	[thread overview]
Message-ID: <20190809225833.6657-18-ira.weiny@intel.com> (raw)
In-Reply-To: <20190809225833.6657-1-ira.weiny@intel.com>

From: Ira Weiny <ira.weiny@intel.com>

In order to properly track the pinning information we need to keep a
vaddr_pin object around.  Store that within the umem object directly.

The vaddr_pin object allows the GUP code to associate any files it pins
with the RDMA file descriptor associated with this GUP.

Furthermore, use the vaddr_pin object to store the owning mm while we
are at it.

No references need to be taken on the owing file as the lifetime of that
object is tied to all the umems being destroyed first.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/core/umem.c     | 26 +++++++++++++++++---------
 drivers/infiniband/core/umem_odp.c | 16 ++++++++--------
 include/rdma/ib_umem.h             |  2 +-
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 965cf9dea71a..a9ce3e3816ef 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,7 +54,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
 	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
 		page = sg_page_iter_page(&sg_iter);
-		put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
+		vaddr_unpin_pages_dirty_lock(&page, 1, &umem->vaddr_pin,
+					     umem->writable && dirty);
 	}
 
 	sg_free_table(&umem->sg_head);
@@ -243,8 +244,15 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	umem->length     = size;
 	umem->address    = addr;
 	umem->writable   = ib_access_writable(access);
-	umem->owning_mm = mm = current->mm;
-	mmgrab(mm);
+	umem->vaddr_pin.mm = mm = current->mm;
+	mmgrab(umem->vaddr_pin.mm);
+
+	/* No need to get a reference to the core file object here.  The key is
+	 * that sys_file reference is held by the ufile.  Any duplication of
+	 * sys_file by the core will keep references active until all those
+	 * contexts are closed out.  No matter which process hold them open.
+	 */
+	umem->vaddr_pin.f_owner = context->ufile->sys_file;
 
 	if (access & IB_ACCESS_ON_DEMAND) {
 		if (WARN_ON_ONCE(!context->invalidate_range)) {
@@ -292,11 +300,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 
 	while (npages) {
 		down_read(&mm->mmap_sem);
-		ret = get_user_pages(cur_base,
+		ret = vaddr_pin_pages(cur_base,
 				     min_t(unsigned long, npages,
 					   PAGE_SIZE / sizeof (struct page *)),
-				     gup_flags | FOLL_LONGTERM,
-				     page_list, NULL);
+				     gup_flags,
+				     page_list, &umem->vaddr_pin);
 		if (ret < 0) {
 			up_read(&mm->mmap_sem);
 			goto umem_release;
@@ -336,7 +344,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	free_page((unsigned long) page_list);
 umem_kfree:
 	if (ret) {
-		mmdrop(umem->owning_mm);
+		mmdrop(umem->vaddr_pin.mm);
 		kfree(umem);
 	}
 	return ret ? ERR_PTR(ret) : umem;
@@ -345,7 +353,7 @@ EXPORT_SYMBOL(ib_umem_get);
 
 static void __ib_umem_release_tail(struct ib_umem *umem)
 {
-	mmdrop(umem->owning_mm);
+	mmdrop(umem->vaddr_pin.mm);
 	if (umem->is_odp)
 		kfree(to_ib_umem_odp(umem));
 	else
@@ -369,7 +377,7 @@ void ib_umem_release(struct ib_umem *umem)
 
 	__ib_umem_release(umem->context->device, umem, 1);
 
-	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
+	atomic64_sub(ib_umem_num_pages(umem), &umem->vaddr_pin.mm->pinned_vm);
 	__ib_umem_release_tail(umem);
 }
 EXPORT_SYMBOL(ib_umem_release);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 2a75c6f8d827..53085896d718 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -278,11 +278,11 @@ static int get_per_mm(struct ib_umem_odp *umem_odp)
 	 */
 	mutex_lock(&ctx->per_mm_list_lock);
 	list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
-		if (per_mm->mm == umem_odp->umem.owning_mm)
+		if (per_mm->mm == umem_odp->umem.vaddr_pin.mm)
 			goto found;
 	}
 
-	per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
+	per_mm = alloc_per_mm(ctx, umem_odp->umem.vaddr_pin.mm);
 	if (IS_ERR(per_mm)) {
 		mutex_unlock(&ctx->per_mm_list_lock);
 		return PTR_ERR(per_mm);
@@ -355,8 +355,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
 	umem->writable   = root->umem.writable;
 	umem->is_odp = 1;
 	odp_data->per_mm = per_mm;
-	umem->owning_mm  = per_mm->mm;
-	mmgrab(umem->owning_mm);
+	umem->vaddr_pin.mm  = per_mm->mm;
+	mmgrab(umem->vaddr_pin.mm);
 
 	mutex_init(&odp_data->umem_mutex);
 	init_completion(&odp_data->notifier_completion);
@@ -389,7 +389,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
 out_page_list:
 	vfree(odp_data->page_list);
 out_odp_data:
-	mmdrop(umem->owning_mm);
+	mmdrop(umem->vaddr_pin.mm);
 	kfree(odp_data);
 	return ERR_PTR(ret);
 }
@@ -399,10 +399,10 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 {
 	struct ib_umem *umem = &umem_odp->umem;
 	/*
-	 * NOTE: This must called in a process context where umem->owning_mm
+	 * NOTE: This must called in a process context where umem->vaddr_pin.mm
 	 * == current->mm
 	 */
-	struct mm_struct *mm = umem->owning_mm;
+	struct mm_struct *mm = umem->vaddr_pin.mm;
 	int ret_val;
 
 	umem_odp->page_shift = PAGE_SHIFT;
@@ -581,7 +581,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
 			      unsigned long current_seq)
 {
 	struct task_struct *owning_process  = NULL;
-	struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
+	struct mm_struct *owning_mm = umem_odp->umem.vaddr_pin.mm;
 	struct page       **local_page_list = NULL;
 	u64 page_mask, off;
 	int j, k, ret = 0, start_idx, npages = 0;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 1052d0d62be7..ab677c799e29 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -43,7 +43,6 @@ struct ib_umem_odp;
 
 struct ib_umem {
 	struct ib_ucontext     *context;
-	struct mm_struct       *owning_mm;
 	size_t			length;
 	unsigned long		address;
 	u32 writable : 1;
@@ -52,6 +51,7 @@ struct ib_umem {
 	struct sg_table sg_head;
 	int             nmap;
 	unsigned int    sg_nents;
+	struct vaddr_pin vaddr_pin;
 };
 
 /* Returns the offset of the umem start relative to the first page. */
-- 
2.20.1


  parent reply	other threads:[~2019-08-09 22:59 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-09 22:58 [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 01/19] fs/locks: Export F_LAYOUT lease to user space ira.weiny
2019-08-09 23:52   ` Dave Chinner
2019-08-12 17:36     ` Ira Weiny
2019-08-14  8:05       ` Dave Chinner
2019-08-14 11:21         ` Jeff Layton
2019-08-14 11:38           ` Dave Chinner
2019-08-09 22:58 ` [RFC PATCH v2 02/19] fs/locks: Add Exclusive flag to user Layout lease ira.weiny
2019-08-14 14:15   ` Jeff Layton
2019-08-14 21:56     ` Dave Chinner
2019-08-26 10:41       ` Jeff Layton
2019-08-29 23:34         ` Ira Weiny
2019-09-04 12:52           ` Jeff Layton
2019-09-04 23:12   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 03/19] mm/gup: Pass flags down to __gup_device_huge* calls ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 04/19] mm/gup: Ensure F_LAYOUT lease is held prior to GUP'ing pages ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 05/19] fs/ext4: Teach ext4 to break layout leases ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 06/19] fs/ext4: Teach dax_layout_busy_page() to operate on a sub-range ira.weiny
2019-08-23 15:18   ` Vivek Goyal
2019-08-29 18:52     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 07/19] fs/xfs: Teach xfs to use new dax_layout_busy_page() ira.weiny
2019-08-09 23:30   ` Dave Chinner
2019-08-12 18:05     ` Ira Weiny
2019-08-14  8:04       ` Dave Chinner
2019-08-09 22:58 ` [RFC PATCH v2 08/19] fs/xfs: Fail truncate if page lease can't be broken ira.weiny
2019-08-09 23:22   ` Dave Chinner
2019-08-12 18:08     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 09/19] mm/gup: Introduce vaddr_pin structure ira.weiny
2019-08-10  0:06   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 10/19] mm/gup: Pass a NULL vaddr_pin through GUP fast ira.weiny
2019-08-10  0:06   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 11/19] mm/gup: Pass follow_page_context further down the call stack ira.weiny
2019-08-10  0:18   ` John Hubbard
2019-08-12 19:01     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 12/19] mm/gup: Prep put_user_pages() to take an vaddr_pin struct ira.weiny
2019-08-10  0:30   ` John Hubbard
2019-08-12 20:46     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 13/19] {mm,file}: Add file_pins objects ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 14/19] fs/locks: Associate file pins while performing GUP ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 15/19] mm/gup: Introduce vaddr_pin_pages() ira.weiny
2019-08-10  0:09   ` John Hubbard
2019-08-12 21:00     ` Ira Weiny
2019-08-12 21:20       ` John Hubbard
2019-08-11 23:07   ` John Hubbard
2019-08-12 21:01     ` Ira Weiny
2019-08-12 12:28   ` Jason Gunthorpe
2019-08-12 21:48     ` Ira Weiny
2019-08-13 11:47       ` Jason Gunthorpe
2019-08-13 17:46         ` Ira Weiny
2019-08-13 17:56           ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 16/19] RDMA/uverbs: Add back pointer to system file object ira.weiny
2019-08-12 13:00   ` Jason Gunthorpe
2019-08-12 17:28     ` Ira Weiny
2019-08-12 17:56       ` Jason Gunthorpe
2019-08-12 21:15         ` Ira Weiny
2019-08-13 11:48           ` Jason Gunthorpe
2019-08-13 17:41             ` Ira Weiny
2019-08-13 18:00               ` Jason Gunthorpe
2019-08-13 20:38                 ` Ira Weiny
2019-08-14 12:23                   ` Jason Gunthorpe
2019-08-14 17:50                     ` Ira Weiny
2019-08-14 18:15                       ` Jason Gunthorpe
2019-09-04 22:25                     ` Ira Weiny
2019-09-11  8:19                       ` Jason Gunthorpe
2019-08-09 22:58 ` ira.weiny [this message]
2019-08-09 22:58 ` [RFC PATCH v2 18/19] {mm,procfs}: Add display file_pins proc ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 19/19] mm/gup: Remove FOLL_LONGTERM DAX exclusion ira.weiny
2019-08-14 10:17 ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) Jan Kara
2019-08-14 18:08   ` Ira Weiny
2019-08-15 13:05     ` Jan Kara
2019-08-16 19:05       ` Ira Weiny
2019-08-16 23:20         ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ; -) Ira Weiny
2019-08-19  6:36           ` Jan Kara
2019-08-17  2:26         ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) Dave Chinner
2019-08-19  6:34           ` Jan Kara
2019-08-19  9:24             ` Dave Chinner
2019-08-19 12:38               ` Jason Gunthorpe
2019-08-19 21:53                 ` Ira Weiny
2019-08-20  1:12                 ` Dave Chinner
2019-08-20 11:55                   ` Jason Gunthorpe
2019-08-21 18:02                     ` Ira Weiny
2019-08-21 18:13                       ` Jason Gunthorpe
2019-08-21 18:22                         ` John Hubbard
2019-08-21 18:57                         ` Ira Weiny
2019-08-21 19:06                           ` Ira Weiny
2019-08-21 19:48                           ` Jason Gunthorpe
2019-08-21 20:44                             ` Ira Weiny
2019-08-21 23:49                               ` Jason Gunthorpe
2019-08-23  3:23                               ` Dave Chinner
2019-08-23 12:04                                 ` Jason Gunthorpe
2019-08-24  0:11                                   ` Dave Chinner
2019-08-24  5:08                                     ` Ira Weiny
2019-08-26  5:55                                       ` Dave Chinner
2019-08-29  2:02                                         ` Ira Weiny
2019-08-29  3:27                                           ` John Hubbard
2019-08-29 16:16                                             ` Ira Weiny
2019-09-02 22:26                                           ` Dave Chinner
2019-09-04 16:54                                             ` Ira Weiny
2019-08-25 19:39                                     ` Jason Gunthorpe
2019-08-24  4:49                                 ` Ira Weiny
2019-08-25 19:40                                   ` Jason Gunthorpe
2019-08-23  0:59                       ` Dave Chinner
2019-08-23 17:15                         ` Ira Weiny
2019-08-24  0:18                           ` Dave Chinner
2019-08-20  0:05               ` John Hubbard
2019-08-20  1:20                 ` Dave Chinner
2019-08-20  3:09                   ` John Hubbard
2019-08-20  3:36                     ` Dave Chinner
2019-08-21 18:43                       ` John Hubbard
2019-08-21 19:09                         ` Ira Weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190809225833.6657-18-ira.weiny@intel.com \
    --to=ira.weiny@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=tytso@mit.edu \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).