All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrea Arcangeli <aarcange@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, Michael Rapoport <RAPOPORT@il.ibm.com>,
	"Dr. David Alan Gilbert"@v2.random, " <dgilbert@redhat.com>,
	 Mike Kravetz <mike.kravetz@oracle.com>,
	 Shaohua Li <shli@fb.com>,
	 Pavel Emelyanov <xemul@parallels.com>"@v2.random
Subject: [PATCH 17/33] userfaultfd: hugetlbfs: allow registration of ranges containing huge pages
Date: Wed,  2 Nov 2016 20:33:49 +0100	[thread overview]
Message-ID: <1478115245-32090-18-git-send-email-aarcange@redhat.com> (raw)
In-Reply-To: <1478115245-32090-1-git-send-email-aarcange@redhat.com>

From: Mike Kravetz <mike.kravetz@oracle.com>

Expand the userfaultfd_register/unregister routines to allow VM_HUGETLB
vmas.  huge page alignment checking is performed after a VM_HUGETLB
vma is encountered.

Also, since there is no UFFDIO_ZEROPAGE support for huge pages do not
return that as a valid ioctl method for huge page ranges.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 fs/userfaultfd.c                 | 55 ++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/userfaultfd.h |  3 +++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 9357dcf..a73e999 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -27,6 +27,7 @@
 #include <linux/mempolicy.h>
 #include <linux/ioctl.h>
 #include <linux/security.h>
+#include <linux/hugetlb.h>
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
@@ -1021,6 +1022,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	struct uffdio_register __user *user_uffdio_register;
 	unsigned long vm_flags, new_flags;
 	bool found;
+	bool huge_pages;
 	unsigned long start, end, vma_end;
 
 	user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1072,6 +1074,17 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		goto out_unlock;
 
 	/*
+	 * If the first vma contains huge pages, make sure start address
+	 * is aligned to huge page size.
+	 */
+	if (is_vm_hugetlb_page(vma)) {
+		unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
+
+		if (start & (vma_hpagesize - 1))
+			goto out_unlock;
+	}
+
+	/*
 	 * Search for not compatible vmas.
 	 *
 	 * FIXME: this shall be relaxed later so that it doesn't fail
@@ -1079,6 +1092,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	 * on anonymous vmas).
 	 */
 	found = false;
+	huge_pages = false;
 	for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
 		cond_resched();
 
@@ -1087,8 +1101,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 
 		/* check not compatible vmas */
 		ret = -EINVAL;
-		if (!vma_is_anonymous(cur))
+		if (!vma_is_anonymous(cur) && !is_vm_hugetlb_page(cur))
 			goto out_unlock;
+		/*
+		 * If this vma contains ending address, and huge pages
+		 * check alignment.
+		 */
+		if (is_vm_hugetlb_page(cur) && end <= cur->vm_end &&
+		    end > cur->vm_start) {
+			unsigned long vma_hpagesize = vma_kernel_pagesize(cur);
+
+			ret = -EINVAL;
+
+			if (end & (vma_hpagesize - 1))
+				goto out_unlock;
+		}
 
 		/*
 		 * Check that this vma isn't already owned by a
@@ -1101,6 +1128,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		    cur->vm_userfaultfd_ctx.ctx != ctx)
 			goto out_unlock;
 
+		/*
+		 * Note vmas containing huge pages
+		 */
+		if (is_vm_hugetlb_page(cur))
+			huge_pages = true;
+
 		found = true;
 	}
 	BUG_ON(!found);
@@ -1112,7 +1145,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	do {
 		cond_resched();
 
-		BUG_ON(!vma_is_anonymous(vma));
+		BUG_ON(!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma));
 		BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
 		       vma->vm_userfaultfd_ctx.ctx != ctx);
 
@@ -1170,7 +1203,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		 * userland which ioctls methods are guaranteed to
 		 * succeed on this range.
 		 */
-		if (put_user(UFFD_API_RANGE_IOCTLS,
+		if (put_user(huge_pages ? UFFD_API_RANGE_IOCTLS_HPAGE :
+			     UFFD_API_RANGE_IOCTLS,
 			     &user_uffdio_register->ioctls))
 			ret = -EFAULT;
 	}
@@ -1217,6 +1251,17 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		goto out_unlock;
 
 	/*
+	 * If the first vma contains huge pages, make sure start address
+	 * is aligned to huge page size.
+	 */
+	if (is_vm_hugetlb_page(vma)) {
+		unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
+
+		if (start & (vma_hpagesize - 1))
+			goto out_unlock;
+	}
+
+	/*
 	 * Search for not compatible vmas.
 	 *
 	 * FIXME: this shall be relaxed later so that it doesn't fail
@@ -1238,7 +1283,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		 * provides for more strict behavior to notice
 		 * unregistration errors.
 		 */
-		if (!vma_is_anonymous(cur))
+		if (!vma_is_anonymous(cur) && !is_vm_hugetlb_page(cur))
 			goto out_unlock;
 
 		found = true;
@@ -1252,7 +1297,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 	do {
 		cond_resched();
 
-		BUG_ON(!vma_is_anonymous(vma));
+		BUG_ON(!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 2bbf323..a3828a9 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -29,6 +29,9 @@
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_HPAGE		\
+	((__u64)1 << _UFFDIO_WAKE |		\
+	 (__u64)1 << _UFFDIO_COPY)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-11-02 19:34 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-02 19:33 [PATCH 00/33] userfaultfd tmpfs/hugetlbfs/non-cooperative Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 01/33] userfaultfd: document _IOR/_IOW Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 02/33] userfaultfd: correct comment about UFFD_FEATURE_PAGEFAULT_FLAG_WP Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 03/33] userfaultfd: convert BUG() to WARN_ON_ONCE() Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 04/33] userfaultfd: use vma_is_anonymous Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 05/33] userfaultfd: non-cooperative: Split the find_userfault() routine Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 06/33] userfaultfd: non-cooperative: Add ability to report non-PF events from uffd descriptor Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 07/33] userfaultfd: non-cooperative: report all available features to userland Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 08/33] userfaultfd: non-cooperative: Add fork() event Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 09/33] userfaultfd: non-cooperative: Add fork() event, build warning fix Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 10/33] userfaultfd: non-cooperative: dup_userfaultfd: use mm_count instead of mm_users Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 11/33] userfaultfd: non-cooperative: Add mremap() event Andrea Arcangeli
2016-11-03  7:41   ` Hillf Danton
2016-11-03 17:52     ` Mike Rapoport
2016-11-04 15:40     ` Mike Rapoport
2016-11-02 19:33 ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED request Andrea Arcangeli
2016-11-03  8:01   ` Hillf Danton
2016-11-03 17:24     ` Mike Rapoport
2016-11-04 16:40       ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED requestg Andrea Arcangeli
2016-11-04 15:42     ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED request Mike Rapoport
2016-11-02 19:33 ` [PATCH 13/33] userfaultfd: hugetlbfs: add copy_huge_page_from_user for hugetlb userfaultfd support Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 14/33] userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for " Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 15/33] userfaultfd: hugetlbfs: add __mcopy_atomic_hugetlb for huge page UFFDIO_COPY Andrea Arcangeli
2016-11-03 10:15   ` Hillf Danton
2016-11-03 17:33     ` Mike Kravetz
2016-11-03 19:14       ` Mike Kravetz
2016-11-04  6:43         ` Hillf Danton
2016-11-04 19:36         ` Andrea Arcangeli
2016-11-04 20:34           ` Mike Kravetz
2016-11-08 21:06           ` Mike Kravetz
2016-11-16 18:28             ` Andrea Arcangeli
2016-11-16 18:53               ` Mike Kravetz
2016-11-17 15:40                 ` Andrea Arcangeli
2016-11-17 19:26                   ` Mike Kravetz
2016-11-18  0:05                     ` Andrea Arcangeli
2016-11-18  5:52                       ` Mike Kravetz
2016-11-22  1:16                         ` Mike Kravetz
2016-11-23  6:38                           ` Hillf Danton
2016-12-15 19:02                             ` Andrea Arcangeli
2016-12-16  3:54                               ` Hillf Danton
2016-11-17 19:41               ` Mike Kravetz
2016-11-04 16:35       ` Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 16/33] userfaultfd: hugetlbfs: add userfaultfd hugetlb hook Andrea Arcangeli
2016-11-04  7:02   ` Hillf Danton
2016-11-02 19:33 ` Andrea Arcangeli [this message]
2016-11-02 19:33 ` [PATCH 18/33] userfaultfd: hugetlbfs: add userfaultfd_hugetlb test Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 19/33] userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 20/33] userfaultfd: introduce vma_can_userfault Andrea Arcangeli
2016-11-04  7:39   ` Hillf Danton
2016-11-02 19:33 ` [PATCH 21/33] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 22/33] userfaultfd: shmem: introduce vma_is_shmem Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 23/33] userfaultfd: shmem: add tlbflush.h header for microblaze Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 24/33] userfaultfd: shmem: use shmem_mcopy_atomic_pte for shared memory Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 25/33] userfaultfd: shmem: add userfaultfd hook for shared memory faults Andrea Arcangeli
2016-11-04  8:59   ` Hillf Danton
2016-11-04 14:53     ` Mike Rapoport
2016-11-04 15:44     ` Mike Rapoport
2016-11-04 16:56       ` Andrea Arcangeli
2016-11-18  0:37       ` Andrea Arcangeli
2016-11-20 12:10         ` Mike Rapoport
2016-11-02 19:33 ` [PATCH 26/33] userfaultfd: shmem: allow registration of shared memory ranges Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 27/33] userfaultfd: shmem: add userfaultfd_shmem test Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 28/33] userfaultfd: shmem: lock the page before adding it to pagecache Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 29/33] userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 30/33] userfaultfd: non-cooperative: selftest: introduce userfaultfd_open Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 31/33] userfaultfd: non-cooperative: selftest: add ufd parameter to copy_page Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 32/33] userfaultfd: non-cooperative: selftest: add test for FORK, MADVDONTNEED and REMAP events Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 33/33] mm: mprotect: use pmd_trans_unstable instead of taking the pmd_lock Andrea Arcangeli
2016-11-02 20:07 ` [PATCH 00/33] userfaultfd tmpfs/hugetlbfs/non-cooperative Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1478115245-32090-18-git-send-email-aarcange@redhat.com \
    --to=aarcange@redhat.com \
    --cc=" <dgilbert@redhat.com>,  Mike Kravetz <mike.kravetz@oracle.com>,  Shaohua Li <shli@fb.com>,  Pavel Emelyanov <xemul@parallels.com>"@v2.random \
    --cc="Dr. David Alan Gilbert"@v2.random \
    --cc=RAPOPORT@il.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.