linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ira.weiny@intel.com
To: lsf-pc@lists.linux-foundation.org
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, "Dan Williams" <dan.j.williams@intel.com>,
	"Jan Kara" <jack@suse.cz>, "Jérôme Glisse" <jglisse@redhat.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	"Michal Hocko" <mhocko@suse.com>,
	"Ira Weiny" <ira.weiny@intel.com>
Subject: [RFC PATCH 04/10] WIP: mm/gup: Ensure F_LONGTERM lease is held on GUP pages
Date: Sun, 28 Apr 2019 21:53:53 -0700	[thread overview]
Message-ID: <20190429045359.8923-5-ira.weiny@intel.com> (raw)
In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com>

From: Ira Weiny <ira.weiny@intel.com>

Honestly I think I should remove this patch.  It is removed later in the
series and ensuring the lease is there at GUP time does not guarantee
the lease is held.  The user could remove the lease???

Regardless the code in GUP to take the lease holds it even if the user
does try to remove it and will take the lease back if they race and the
lease is remove prior to the GUP getting a reference to it...

So pretty much anyway you slice it this patch is not needed...

FOLL_LONGTERM pins are currently disabled for GUP calls which map to FS
DAX files.  As an alternative allow these files to be mapped if the user
has taken a F_LONGTERM lease on the file.

The intention is that the user is aware of the dangers of file
truncated/hole punch and accepts

file which has been mapped this way (such as is done
with RDMA) and they have taken this lease to indicate they will accept
the behavior if the filesystem needs to take action.

Example user space pseudocode for a user using RDMA and reacting to a
lease break of this type would look like this:

    lease_break() {
    ...
            if (sigio.fd == rdma_fd) {
                    ibv_dereg_mr(mr);
                    close(rdma_fd);
            }
    }

    foo() {
            rdma_fd = open()
            fcntl(rdma_fd, F_SETLEASE, F_LONGTERM);
            sigaction(SIGIO, ...  lease_break ...);
            ptr = mmap(rdma_fd, ...);
            mr = ibv_reg_mr(ptr, ...);
    }

Failure to process the SIGIO as above will result in a SIGBUS being
given to the process.  SIGBUS is implemented in later patches.

This patch X of Y fails the FOLL_LONGTERM pin if the FL_LONGTERM lease
is not held.
---
 fs/locks.c         | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |  2 ++
 mm/gup.c           | 13 +++++++++++++
 mm/huge_memory.c   | 20 ++++++++++++++++++++
 4 files changed, 82 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 8ea1c5713e6a..31c8b761a578 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2939,3 +2939,50 @@ static int __init filelock_init(void)
 	return 0;
 }
 core_initcall(filelock_init);
+
+// FIXME what about GUP calls to Device DAX???
+// I believe they will still return true for *_devmap
+//
+// return true if the page has a LONGTERM lease associated with it's file.
+bool mapping_inode_has_longterm(struct page *page)
+{
+	bool ret;
+	struct inode *inode;
+	struct file_lock *fl;
+	struct file_lock_context *ctx;
+
+	/*
+	 * should never be here unless we are a "page cache" page without a
+	 * page cache.
+	 */
+	if (WARN_ON(PageAnon(page)))
+		return false;
+	if (WARN_ON(!page))
+		return false;
+	if (WARN_ON(!page->mapping))
+		return false;
+	if (WARN_ON(!page->mapping->host))
+		return false;
+
+	/* Ensure page->mapping isn't freed while we look at it */
+	/* FIXME mm lock is held here I think?  so is this really needed? */
+	rcu_read_lock();
+	inode = page->mapping->host;
+
+	ctx = locks_get_lock_context(inode, F_RDLCK);
+
+	ret = false;
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+		if (fl->fl_flags & FL_LONGTERM) {
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock(&ctx->flc_lock);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mapping_inode_has_longterm);
+
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 77e34ec5dfbe..cde359e71b7b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1572,6 +1572,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages);
 
+bool mapping_inode_has_longterm(struct page *page);
+
 /* Container for pinned pfns / pages */
 struct frame_vector {
 	unsigned int nr_allocated;	/* Number of frames we have space for */
diff --git a/mm/gup.c b/mm/gup.c
index a8ac75bc1452..5ae1dd31a58d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -292,6 +292,12 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = pte_page(pte);
 		else
 			goto no_page;
+
+		if (unlikely(flags & FOLL_LONGTERM) &&
+		    !mapping_inode_has_longterm(page)) {
+			page = ERR_PTR(-EINVAL);
+			goto out;
+		}
 	} else if (unlikely(!page)) {
 		if (flags & FOLL_DUMP) {
 			/* Avoid special (like zero) pages in core dumps */
@@ -1869,6 +1875,13 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
 		}
 		SetPageReferenced(page);
 		pages[*nr] = page;
+
+		if (unlikely(flags & FOLL_LONGTERM) &&
+		    !mapping_inode_has_longterm(page)) {
+			undo_dev_pagemap(nr, nr_start, pages);
+			return 0;
+		}
+
 		if (get_gup_pin_page(page)) {
 			undo_dev_pagemap(nr, nr_start, pages);
 			return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455..8819624c740f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -910,6 +910,16 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 	if (!*pgmap)
 		return ERR_PTR(-EFAULT);
 	page = pfn_to_page(pfn);
+
+	// Check for Layout lease.
+	// FIXME combine logic
+	if (unlikely(flags & FOLL_LONGTERM)) {
+		WARN_ON_ONCE(PageAnon(page));
+		if (!mapping_inode_has_longterm(page)) {
+			return NULL;
+		}
+	}
+
 	get_page(page);
 
 	return page;
@@ -1050,6 +1060,16 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 	if (!*pgmap)
 		return ERR_PTR(-EFAULT);
 	page = pfn_to_page(pfn);
+
+	// Check for LONGTERM lease.
+	// FIXME combine logic remove Warn
+	if (unlikely(flags & FOLL_LONGTERM)) {
+		WARN_ON_ONCE(PageAnon(page));
+		if (!mapping_inode_has_longterm(page)) {
+			return NULL;
+		}
+	}
+
 	get_page(page);
 
 	return page;
-- 
2.20.1


  parent reply	other threads:[~2019-04-29  4:54 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-29  4:53 [RFC PATCH 00/10] RDMA/FS DAX "LONGTERM" lease proposal ira.weiny
2019-04-29  4:53 ` [RFC PATCH 01/10] fs/locks: Add trace_leases_conflict ira.weiny
2019-04-29  4:53 ` [RFC PATCH 02/10] fs/locks: Introduce FL_LONGTERM file lease ira.weiny
2019-04-29  4:53 ` [RFC PATCH 03/10] mm/gup: Pass flags down to __gup_device_huge* calls ira.weiny
2019-04-29  4:53 ` ira.weiny [this message]
2019-04-29  4:53 ` [RFC PATCH 05/10] mm/gup: Take FL_LONGTERM lease if not set by user ira.weiny
2019-04-29  4:53 ` [RFC PATCH 06/10] fs/locks: Add longterm lease traces ira.weiny
2019-04-29  4:53 ` [RFC PATCH 07/10] fs/dax: Create function dax_mapping_is_dax() ira.weiny
2019-04-29  4:53 ` [RFC PATCH 08/10] mm/gup: fs: Send SIGBUS on truncate of active file ira.weiny
2019-04-29  4:53 ` [RFC PATCH 09/10] fs/locks: Add tracepoint for SIGBUS on LONGTERM expiration ira.weiny
2019-04-29  4:53 ` [RFC PATCH 10/10] mm/gup: Remove FOLL_LONGTERM DAX exclusion ira.weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190429045359.8923-5-ira.weiny@intel.com \
    --to=ira.weiny@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=jack@suse.cz \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lsf-pc@lists.linux-foundation.org \
    --cc=mhocko@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).