linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ira.weiny@intel.com
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>,
	Dan Williams <dan.j.williams@intel.com>,
	Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	"Theodore Ts'o" <tytso@mit.edu>,
	John Hubbard <jhubbard@nvidia.com>,
	Michal Hocko <mhocko@suse.com>,
	Dave Chinner <david@fromorbit.com>,
	linux-xfs@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org,
	linux-mm@kvack.org, Ira Weiny <ira.weiny@intel.com>
Subject: [RFC PATCH v2 04/19] mm/gup: Ensure F_LAYOUT lease is held prior to GUP'ing pages
Date: Fri,  9 Aug 2019 15:58:18 -0700	[thread overview]
Message-ID: <20190809225833.6657-5-ira.weiny@intel.com> (raw)
In-Reply-To: <20190809225833.6657-1-ira.weiny@intel.com>

From: Ira Weiny <ira.weiny@intel.com>

On FS DAX files users must inform the file system they intend to take
long term GUP pins on the file pages.  Failure to do so should result in
an error.

Ensure that a F_LAYOUT lease exists at the time the GUP call is made.
If not return EPERM.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from RFC v1:

    The old version had remnants of when GUP was going to take the lease
    for the user.  Remove this prototype code.
    Fix issue in gup_device_huge which was setting page reference prior
    to checking for Layout Lease
    Re-base to 5.3+
    Clean up htmldoc comments

 fs/locks.c         | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |  2 ++
 mm/gup.c           | 23 +++++++++++++++++++++++
 mm/huge_memory.c   | 12 ++++++++++++
 4 files changed, 84 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 0c7359cdab92..14892c84844b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2971,3 +2971,50 @@ static int __init filelock_init(void)
 	return 0;
 }
 core_initcall(filelock_init);
+
+/**
+ * mapping_inode_has_layout - ensure a file mapped page has a layout lease
+ * taken
+ * @page: page we are trying to GUP
+ *
+ * This should only be called on DAX pages.  DAX pages which are mapped through
+ * FS DAX do not use the page cache.  As a result they require the user to take
+ * a LAYOUT lease on them prior to be able to pin them for longterm use.
+ * This allows the user to opt-into the fact that truncation operations will
+ * fail for the duration of the pin.
+ *
+ * Return true if the page has a LAYOUT lease associated with it's file.
+ */
+bool mapping_inode_has_layout(struct page *page)
+{
+	bool ret = false;
+	struct inode *inode;
+	struct file_lock *fl;
+
+	if (WARN_ON(PageAnon(page)) ||
+	    WARN_ON(!page) ||
+	    WARN_ON(!page->mapping) ||
+	    WARN_ON(!page->mapping->host))
+		return false;
+
+	inode = page->mapping->host;
+
+	smp_mb();
+	if (inode->i_flctx &&
+	    !list_empty_careful(&inode->i_flctx->flc_lease)) {
+		spin_lock(&inode->i_flctx->flc_lock);
+		ret = false;
+		list_for_each_entry(fl, &inode->i_flctx->flc_lease, fl_list) {
+			if (fl->fl_pid == current->tgid &&
+			    (fl->fl_flags & FL_LAYOUT) &&
+			    (fl->fl_flags & FL_EXCLUSIVE)) {
+				ret = true;
+				break;
+			}
+		}
+		spin_unlock(&inode->i_flctx->flc_lock);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mapping_inode_has_layout);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad6766a08f9b..04f22722b374 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1583,6 +1583,8 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
 int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
 			struct task_struct *task, bool bypass_rlim);
 
+bool mapping_inode_has_layout(struct page *page);
+
 /* Container for pinned pfns / pages */
 struct frame_vector {
 	unsigned int nr_allocated;	/* Number of frames we have space for */
diff --git a/mm/gup.c b/mm/gup.c
index 80423779a50a..0b05e22ac05f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -221,6 +221,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = pte_page(pte);
 		else
 			goto no_page;
+
+		if (unlikely(flags & FOLL_LONGTERM) &&
+		    (*pgmap)->type == MEMORY_DEVICE_FS_DAX &&
+		    !mapping_inode_has_layout(page)) {
+			page = ERR_PTR(-EPERM);
+			goto out;
+		}
 	} else if (unlikely(!page)) {
 		if (flags & FOLL_DUMP) {
 			/* Avoid special (like zero) pages in core dumps */
@@ -1847,6 +1854,14 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
 
+		if (pte_devmap(pte) &&
+		    unlikely(flags & FOLL_LONGTERM) &&
+		    pgmap->type == MEMORY_DEVICE_FS_DAX &&
+		    !mapping_inode_has_layout(head)) {
+			put_user_page(head);
+			goto pte_unmap;
+		}
+
 		SetPageReferenced(page);
 		pages[*nr] = page;
 		(*nr)++;
@@ -1895,6 +1910,14 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
 			undo_dev_pagemap(nr, nr_start, pages);
 			return 0;
 		}
+
+		if (unlikely(flags & FOLL_LONGTERM) &&
+		    pgmap->type == MEMORY_DEVICE_FS_DAX &&
+		    !mapping_inode_has_layout(page)) {
+			undo_dev_pagemap(nr, nr_start, pages);
+			return 0;
+		}
+
 		SetPageReferenced(page);
 		pages[*nr] = page;
 		get_page(page);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1334ede667a8..bc1a07a55be1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -953,6 +953,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 	if (!*pgmap)
 		return ERR_PTR(-EFAULT);
 	page = pfn_to_page(pfn);
+
+	if (unlikely(flags & FOLL_LONGTERM) &&
+	    (*pgmap)->type == MEMORY_DEVICE_FS_DAX &&
+	    !mapping_inode_has_layout(page))
+		return ERR_PTR(-EPERM);
+
 	get_page(page);
 
 	return page;
@@ -1093,6 +1099,12 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 	if (!*pgmap)
 		return ERR_PTR(-EFAULT);
 	page = pfn_to_page(pfn);
+
+	if (unlikely(flags & FOLL_LONGTERM) &&
+	    (*pgmap)->type == MEMORY_DEVICE_FS_DAX &&
+	    !mapping_inode_has_layout(page))
+		return ERR_PTR(-EPERM);
+
 	get_page(page);
 
 	return page;
-- 
2.20.1


  parent reply	other threads:[~2019-08-09 23:00 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-09 22:58 [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 01/19] fs/locks: Export F_LAYOUT lease to user space ira.weiny
2019-08-09 23:52   ` Dave Chinner
2019-08-12 17:36     ` Ira Weiny
2019-08-14  8:05       ` Dave Chinner
2019-08-14 11:21         ` Jeff Layton
2019-08-14 11:38           ` Dave Chinner
2019-08-09 22:58 ` [RFC PATCH v2 02/19] fs/locks: Add Exclusive flag to user Layout lease ira.weiny
2019-08-14 14:15   ` Jeff Layton
2019-08-14 21:56     ` Dave Chinner
2019-08-26 10:41       ` Jeff Layton
2019-08-29 23:34         ` Ira Weiny
2019-09-04 12:52           ` Jeff Layton
2019-09-04 23:12   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 03/19] mm/gup: Pass flags down to __gup_device_huge* calls ira.weiny
2019-08-09 22:58 ` ira.weiny [this message]
2019-08-09 22:58 ` [RFC PATCH v2 05/19] fs/ext4: Teach ext4 to break layout leases ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 06/19] fs/ext4: Teach dax_layout_busy_page() to operate on a sub-range ira.weiny
2019-08-23 15:18   ` Vivek Goyal
2019-08-29 18:52     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 07/19] fs/xfs: Teach xfs to use new dax_layout_busy_page() ira.weiny
2019-08-09 23:30   ` Dave Chinner
2019-08-12 18:05     ` Ira Weiny
2019-08-14  8:04       ` Dave Chinner
2019-08-09 22:58 ` [RFC PATCH v2 08/19] fs/xfs: Fail truncate if page lease can't be broken ira.weiny
2019-08-09 23:22   ` Dave Chinner
2019-08-12 18:08     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 09/19] mm/gup: Introduce vaddr_pin structure ira.weiny
2019-08-10  0:06   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 10/19] mm/gup: Pass a NULL vaddr_pin through GUP fast ira.weiny
2019-08-10  0:06   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 11/19] mm/gup: Pass follow_page_context further down the call stack ira.weiny
2019-08-10  0:18   ` John Hubbard
2019-08-12 19:01     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 12/19] mm/gup: Prep put_user_pages() to take an vaddr_pin struct ira.weiny
2019-08-10  0:30   ` John Hubbard
2019-08-12 20:46     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 13/19] {mm,file}: Add file_pins objects ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 14/19] fs/locks: Associate file pins while performing GUP ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 15/19] mm/gup: Introduce vaddr_pin_pages() ira.weiny
2019-08-10  0:09   ` John Hubbard
2019-08-12 21:00     ` Ira Weiny
2019-08-12 21:20       ` John Hubbard
2019-08-11 23:07   ` John Hubbard
2019-08-12 21:01     ` Ira Weiny
2019-08-12 12:28   ` Jason Gunthorpe
2019-08-12 21:48     ` Ira Weiny
2019-08-13 11:47       ` Jason Gunthorpe
2019-08-13 17:46         ` Ira Weiny
2019-08-13 17:56           ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 16/19] RDMA/uverbs: Add back pointer to system file object ira.weiny
2019-08-12 13:00   ` Jason Gunthorpe
2019-08-12 17:28     ` Ira Weiny
2019-08-12 17:56       ` Jason Gunthorpe
2019-08-12 21:15         ` Ira Weiny
2019-08-13 11:48           ` Jason Gunthorpe
2019-08-13 17:41             ` Ira Weiny
2019-08-13 18:00               ` Jason Gunthorpe
2019-08-13 20:38                 ` Ira Weiny
2019-08-14 12:23                   ` Jason Gunthorpe
2019-08-14 17:50                     ` Ira Weiny
2019-08-14 18:15                       ` Jason Gunthorpe
2019-09-04 22:25                     ` Ira Weiny
2019-09-11  8:19                       ` Jason Gunthorpe
2019-08-09 22:58 ` [RFC PATCH v2 17/19] RDMA/umem: Convert to vaddr_[pin|unpin]* operations ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 18/19] {mm,procfs}: Add display file_pins proc ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 19/19] mm/gup: Remove FOLL_LONGTERM DAX exclusion ira.weiny
2019-08-14 10:17 ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) Jan Kara
2019-08-14 18:08   ` Ira Weiny
2019-08-15 13:05     ` Jan Kara
2019-08-16 19:05       ` Ira Weiny
2019-08-16 23:20         ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ; -) Ira Weiny
2019-08-19  6:36           ` Jan Kara
2019-08-17  2:26         ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) Dave Chinner
2019-08-19  6:34           ` Jan Kara
2019-08-19  9:24             ` Dave Chinner
2019-08-19 12:38               ` Jason Gunthorpe
2019-08-19 21:53                 ` Ira Weiny
2019-08-20  1:12                 ` Dave Chinner
2019-08-20 11:55                   ` Jason Gunthorpe
2019-08-21 18:02                     ` Ira Weiny
2019-08-21 18:13                       ` Jason Gunthorpe
2019-08-21 18:22                         ` John Hubbard
2019-08-21 18:57                         ` Ira Weiny
2019-08-21 19:06                           ` Ira Weiny
2019-08-21 19:48                           ` Jason Gunthorpe
2019-08-21 20:44                             ` Ira Weiny
2019-08-21 23:49                               ` Jason Gunthorpe
2019-08-23  3:23                               ` Dave Chinner
2019-08-23 12:04                                 ` Jason Gunthorpe
2019-08-24  0:11                                   ` Dave Chinner
2019-08-24  5:08                                     ` Ira Weiny
2019-08-26  5:55                                       ` Dave Chinner
2019-08-29  2:02                                         ` Ira Weiny
2019-08-29  3:27                                           ` John Hubbard
2019-08-29 16:16                                             ` Ira Weiny
2019-09-02 22:26                                           ` Dave Chinner
2019-09-04 16:54                                             ` Ira Weiny
2019-08-25 19:39                                     ` Jason Gunthorpe
2019-08-24  4:49                                 ` Ira Weiny
2019-08-25 19:40                                   ` Jason Gunthorpe
2019-08-23  0:59                       ` Dave Chinner
2019-08-23 17:15                         ` Ira Weiny
2019-08-24  0:18                           ` Dave Chinner
2019-08-20  0:05               ` John Hubbard
2019-08-20  1:20                 ` Dave Chinner
2019-08-20  3:09                   ` John Hubbard
2019-08-20  3:36                     ` Dave Chinner
2019-08-21 18:43                       ` John Hubbard
2019-08-21 19:09                         ` Ira Weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190809225833.6657-5-ira.weiny@intel.com \
    --to=ira.weiny@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=tytso@mit.edu \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).