All of lore.kernel.org
 help / color / mirror / Atom feed
* [to-be-updated] mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy.patch removed from -mm tree
@ 2021-06-01  0:32 akpm
  0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2021-06-01  0:32 UTC (permalink / raw)
  To: almasrymina, axelrasmussen, mike.kravetz, mm-commits, peterx


The patch titled
     Subject: mm, hugetlb: fix resv_huge_pages underflow on UFFDIO_COPY
has been removed from the -mm tree.  Its filename was
     mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy.patch

This patch was dropped because an updated version will be merged

------------------------------------------------------
From: Mina Almasry <almasrymina@google.com>
Subject: mm, hugetlb: fix resv_huge_pages underflow on UFFDIO_COPY

The userfaultfd hugetlb tests detect a resv_huge_pages underflow.  This
happens when hugetlb_mcopy_atomic_pte() is called with !is_continue on an
index for which we already have a page in the cache.  When this happens,
we allocate a second page, double consuming the reservation, and then fail
to insert the page into the cache and return -EEXIST.

To fix this, we first check if there exists a page in the cache which
already consumed the reservation, and return -EEXIST immediately if so.

Secondly, if we fail to copy the page contents while holding the
hugetlb_fault_mutex, we will drop the mutex and return to the caller after
allocating a page that consumed a reservation.  In this case there may be
a fault that double consumes the reservation.  To handle this, we free the
allocated page, fix the reservations, and allocate a temporary hugetlb
page and return that to the caller.  When the caller does the copy outside
of the lock, we again check the cache, and allocate a page consuming the
reservation, and copy over the contents.

Test:
Hacked the code locally such that resv_huge_pages underflows produce
a warning and the copy_huge_page_from_user() always fails, then:

./tools/testing/selftests/vm/userfaultfd hugetlb_shared 10
	2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success
./tools/testing/selftests/vm/userfaultfd hugetlb 10
	2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success

Both tests succeed and produce no warnings. After the test runs
number of free/resv hugepages is correct.

[akpm@linux-foundation.org: fix warning]
Link: https://lkml.kernel.org/r/20210521074433.931380-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/hugetlb.h |    4 +
 mm/hugetlb.c            |  103 ++++++++++++++++++++++++++++++++++----
 mm/migrate.c            |   39 ++------------
 3 files changed, 103 insertions(+), 43 deletions(-)

--- a/include/linux/hugetlb.h~mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy
+++ a/include/linux/hugetlb.h
@@ -194,6 +194,8 @@ unsigned long hugetlb_change_protection(
 bool is_hugetlb_entry_migration(pte_t pte);
 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
 
+void hugetlb_copy_page(struct page *dst, struct page *src);
+
 #else /* !CONFIG_HUGETLB_PAGE */
 
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
@@ -379,6 +381,8 @@ static inline vm_fault_t hugetlb_fault(s
 
 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
 
+static inline void hugetlb_copy_page(struct page *dst, struct page *src) { }
+
 #endif /* !CONFIG_HUGETLB_PAGE */
 /*
  * hugepages at page global directory. If arch support
--- a/mm/hugetlb.c~mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy
+++ a/mm/hugetlb.c
@@ -81,6 +81,45 @@ struct mutex *hugetlb_fault_mutex_table
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
 
+/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page.  We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+				 int nr_pages)
+{
+	int i;
+	struct page *dst_base = dst;
+	struct page *src_base = src;
+
+	for (i = 0; i < nr_pages;) {
+		cond_resched();
+		copy_highpage(dst, src);
+
+		i++;
+		dst = mem_map_next(dst, dst_base, i);
+		src = mem_map_next(src, src_base, i);
+	}
+}
+
+void hugetlb_copy_page(struct page *dst, struct page *src)
+{
+	int i;
+	struct hstate *h = page_hstate(src);
+	int nr_pages = pages_per_huge_page(h);
+
+	if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+		__copy_gigantic_page(dst, src, nr_pages);
+		return;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		cond_resched();
+		copy_highpage(dst + i, src + i);
+	}
+}
+
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
 	if (spool->count)
@@ -4869,19 +4908,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
 			    struct page **pagep)
 {
 	bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
-	struct address_space *mapping;
-	pgoff_t idx;
+	struct hstate *h = hstate_vma(dst_vma);
+	struct address_space *mapping = dst_vma->vm_file->f_mapping;
+	pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
 	unsigned long size;
 	int vm_shared = dst_vma->vm_flags & VM_SHARED;
-	struct hstate *h = hstate_vma(dst_vma);
 	pte_t _dst_pte;
 	spinlock_t *ptl;
-	int ret;
+	int ret = -ENOMEM;
 	struct page *page;
 	int writable;
-
-	mapping = dst_vma->vm_file->f_mapping;
-	idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+	struct mempolicy *mpol;
+	nodemask_t *nodemask;
+	gfp_t gfp_mask = htlb_alloc_mask(h);
+	int node = huge_node(dst_vma, dst_addr, gfp_mask, &mpol, &nodemask);
 
 	if (is_continue) {
 		ret = -EFAULT;
@@ -4889,7 +4929,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
 		if (!page)
 			goto out;
 	} else if (!*pagep) {
-		ret = -ENOMEM;
+		/* If a page already exists, then it's UFFDIO_COPY for
+		 * a non-missing case. Return -EEXIST.
+		 */
+		if (hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
+			ret = -EEXIST;
+			goto out;
+		}
+
 		page = alloc_huge_page(dst_vma, dst_addr, 0);
 		if (IS_ERR(page))
 			goto out;
@@ -4901,12 +4948,48 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
 		/* fallback to copy_from_user outside mmap_lock */
 		if (unlikely(ret)) {
 			ret = -ENOENT;
+			/* Free the allocated page which may have
+			 * consumed a reservation.
+			 */
+			restore_reserve_on_error(h, dst_vma, dst_addr, page);
+			if (!HPageRestoreReserve(page)) {
+				if (unlikely(hugetlb_unreserve_pages(
+					    mapping->host, idx, idx + 1, 1)))
+					hugetlb_fix_reserve_counts(
+						mapping->host);
+			}
+			put_page(page);
+
+			/* Allocate a temporary page to hold the copied
+			 * contents.
+			 */
+			page = alloc_migrate_huge_page(h, gfp_mask, node,
+						       nodemask);
+			if (IS_ERR(page)) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			*pagep = page;
-			/* don't free the page */
+			/* Set the outparam pagep and return to the caller to
+			 * copy the contents outside the lock. Don't free the
+			 * page.
+			 */
 			goto out;
 		}
 	} else {
-		page = *pagep;
+		if (hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
+			put_page(*pagep);
+			ret = -EEXIST;
+			goto out;
+		}
+
+		page = alloc_huge_page(dst_vma, dst_addr, 0);
+		if (IS_ERR(page)) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		__copy_gigantic_page(page, *pagep, pages_per_huge_page(h));
+		put_page(*pagep);
 		*pagep = NULL;
 	}
 
--- a/mm/migrate.c~mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy
+++ a/mm/migrate.c
@@ -528,28 +528,6 @@ int migrate_huge_page_move_mapping(struc
 	return MIGRATEPAGE_SUCCESS;
 }
 
-/*
- * Gigantic pages are so large that we do not guarantee that page++ pointer
- * arithmetic will work across the entire page.  We need something more
- * specialized.
- */
-static void __copy_gigantic_page(struct page *dst, struct page *src,
-				int nr_pages)
-{
-	int i;
-	struct page *dst_base = dst;
-	struct page *src_base = src;
-
-	for (i = 0; i < nr_pages; ) {
-		cond_resched();
-		copy_highpage(dst, src);
-
-		i++;
-		dst = mem_map_next(dst, dst_base, i);
-		src = mem_map_next(src, src_base, i);
-	}
-}
-
 static void copy_huge_page(struct page *dst, struct page *src)
 {
 	int i;
@@ -557,19 +535,14 @@ static void copy_huge_page(struct page *
 
 	if (PageHuge(src)) {
 		/* hugetlbfs page */
-		struct hstate *h = page_hstate(src);
-		nr_pages = pages_per_huge_page(h);
-
-		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
-			__copy_gigantic_page(dst, src, nr_pages);
-			return;
-		}
-	} else {
-		/* thp page */
-		BUG_ON(!PageTransHuge(src));
-		nr_pages = thp_nr_pages(src);
+		hugetlb_copy_page(dst, src);
+		return;
 	}
 
+	/* thp page */
+	BUG_ON(!PageTransHuge(src));
+	nr_pages = thp_nr_pages(src);
+
 	for (i = 0; i < nr_pages; i++) {
 		cond_resched();
 		copy_highpage(dst + i, src + i);
_

Patches currently in -mm which might be from almasrymina@google.com are

mm-hugetlb-fix-racy-resv_huge_pages-underflow-on-uffdio_copy.patch


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [to-be-updated] mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy.patch removed from -mm tree
@ 2021-05-22 22:18 akpm
  0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2021-05-22 22:18 UTC (permalink / raw)
  To: almasrymina, axelrasmussen, mike.kravetz, mm-commits, peterx


The patch titled
     Subject: mm, hugetlb: fix resv_huge_pages underflow on UFFDIO_COPY
has been removed from the -mm tree.  Its filename was
     mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy.patch

This patch was dropped because an updated version will be merged

------------------------------------------------------
From: Mina Almasry <almasrymina@google.com>
Subject: mm, hugetlb: fix resv_huge_pages underflow on UFFDIO_COPY

The userfaultfd hugetlb tests detect a resv_huge_pages underflow. This
happens when hugetlb_mcopy_atomic_pte() is called with !is_continue on
an index for which we already have a page in the cache. When this
happens, we allocate a second page, double consuming the reservation,
and then fail to insert the page into the cache and return -EEXIST.

To fix this, we first if there exists a page in the cache which already
consumed the reservation, and return -EEXIST immediately if so.

Secondly, if we fail to copy the page contents while holding the
hugetlb_fault_mutex, we will drop the mutex and return to the caller
after allocating a page that consumed a reservation. In this case there
may be a fault that double consumes the reservation. To handle this, we
free the allocated page, fix the reservations, and allocate a temporary
hugetlb page and return that to the caller. When the caller does the
copy outside of the lock, we again check the cache, and allocate a page
consuming the reservation, and copy over the contents.

Test:
Hacked the code locally such that resv_huge_pages underflows produce
a warning and the copy_huge_page_from_user() always fails, then:

./tools/testing/selftests/vm/userfaultfd hugetlb_shared 10
	2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success
./tools/testing/selftests/vm/userfaultfd hugetlb 10
	2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success

Both tests succeed and produce no warnings. After the test runs
number of free/resv hugepages is correct.

Link: https://lkml.kernel.org/r/20210521074433.931380-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: linux-mm@kvack.org
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/hugetlb.h |    4 +
 mm/hugetlb.c            |  103 ++++++++++++++++++++++++++++++++++----
 mm/migrate.c            |   39 ++------------
 3 files changed, 103 insertions(+), 43 deletions(-)

--- a/include/linux/hugetlb.h~mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy
+++ a/include/linux/hugetlb.h
@@ -195,6 +195,8 @@ unsigned long hugetlb_change_protection(
 bool is_hugetlb_entry_migration(pte_t pte);
 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
 
+void hugetlb_copy_page(struct page *dst, struct page *src);
+
 #else /* !CONFIG_HUGETLB_PAGE */
 
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
@@ -385,6 +387,8 @@ static inline vm_fault_t hugetlb_fault(s
 
 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
 
+static inline void hugetlb_copy_page(struct page *dst, struct page *src);
+
 #endif /* !CONFIG_HUGETLB_PAGE */
 /*
  * hugepages at page global directory. If arch support
--- a/mm/hugetlb.c~mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy
+++ a/mm/hugetlb.c
@@ -81,6 +81,45 @@ struct mutex *hugetlb_fault_mutex_table
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
 
+/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page.  We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+				 int nr_pages)
+{
+	int i;
+	struct page *dst_base = dst;
+	struct page *src_base = src;
+
+	for (i = 0; i < nr_pages;) {
+		cond_resched();
+		copy_highpage(dst, src);
+
+		i++;
+		dst = mem_map_next(dst, dst_base, i);
+		src = mem_map_next(src, src_base, i);
+	}
+}
+
+void hugetlb_copy_page(struct page *dst, struct page *src)
+{
+	int i;
+	struct hstate *h = page_hstate(src);
+	int nr_pages = pages_per_huge_page(h);
+
+	if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+		__copy_gigantic_page(dst, src, nr_pages);
+		return;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		cond_resched();
+		copy_highpage(dst + i, src + i);
+	}
+}
+
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
 	if (spool->count)
@@ -4869,19 +4908,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
 			    struct page **pagep)
 {
 	bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
-	struct address_space *mapping;
-	pgoff_t idx;
+	struct hstate *h = hstate_vma(dst_vma);
+	struct address_space *mapping = dst_vma->vm_file->f_mapping;
+	pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
 	unsigned long size;
 	int vm_shared = dst_vma->vm_flags & VM_SHARED;
-	struct hstate *h = hstate_vma(dst_vma);
 	pte_t _dst_pte;
 	spinlock_t *ptl;
-	int ret;
+	int ret = -ENOMEM;
 	struct page *page;
 	int writable;
-
-	mapping = dst_vma->vm_file->f_mapping;
-	idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+	struct mempolicy *mpol;
+	nodemask_t *nodemask;
+	gfp_t gfp_mask = htlb_alloc_mask(h);
+	int node = huge_node(dst_vma, dst_addr, gfp_mask, &mpol, &nodemask);
 
 	if (is_continue) {
 		ret = -EFAULT;
@@ -4889,7 +4929,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
 		if (!page)
 			goto out;
 	} else if (!*pagep) {
-		ret = -ENOMEM;
+		/* If a page already exists, then it's UFFDIO_COPY for
+		 * a non-missing case. Return -EEXIST.
+		 */
+		if (hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
+			ret = -EEXIST;
+			goto out;
+		}
+
 		page = alloc_huge_page(dst_vma, dst_addr, 0);
 		if (IS_ERR(page))
 			goto out;
@@ -4901,12 +4948,48 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
 		/* fallback to copy_from_user outside mmap_lock */
 		if (unlikely(ret)) {
 			ret = -ENOENT;
+			/* Free the allocated page which may have
+			 * consumed a reservation.
+			 */
+			restore_reserve_on_error(h, dst_vma, dst_addr, page);
+			if (!HPageRestoreReserve(page)) {
+				if (unlikely(hugetlb_unreserve_pages(
+					    mapping->host, idx, idx + 1, 1)))
+					hugetlb_fix_reserve_counts(
+						mapping->host);
+			}
+			put_page(page);
+
+			/* Allocate a temporary page to hold the copied
+			 * contents.
+			 */
+			page = alloc_migrate_huge_page(h, gfp_mask, node,
+						       nodemask);
+			if (IS_ERR(page)) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			*pagep = page;
-			/* don't free the page */
+			/* Set the outparam pagep and return to the caller to
+			 * copy the contents outside the lock. Don't free the
+			 * page.
+			 */
 			goto out;
 		}
 	} else {
-		page = *pagep;
+		if (hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
+			put_page(*pagep);
+			ret = -EEXIST;
+			goto out;
+		}
+
+		page = alloc_huge_page(dst_vma, dst_addr, 0);
+		if (IS_ERR(page)) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		__copy_gigantic_page(page, *pagep, pages_per_huge_page(h));
+		put_page(*pagep);
 		*pagep = NULL;
 	}
 
--- a/mm/migrate.c~mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy
+++ a/mm/migrate.c
@@ -528,28 +528,6 @@ int migrate_huge_page_move_mapping(struc
 	return MIGRATEPAGE_SUCCESS;
 }
 
-/*
- * Gigantic pages are so large that we do not guarantee that page++ pointer
- * arithmetic will work across the entire page.  We need something more
- * specialized.
- */
-static void __copy_gigantic_page(struct page *dst, struct page *src,
-				int nr_pages)
-{
-	int i;
-	struct page *dst_base = dst;
-	struct page *src_base = src;
-
-	for (i = 0; i < nr_pages; ) {
-		cond_resched();
-		copy_highpage(dst, src);
-
-		i++;
-		dst = mem_map_next(dst, dst_base, i);
-		src = mem_map_next(src, src_base, i);
-	}
-}
-
 static void copy_huge_page(struct page *dst, struct page *src)
 {
 	int i;
@@ -557,19 +535,14 @@ static void copy_huge_page(struct page *
 
 	if (PageHuge(src)) {
 		/* hugetlbfs page */
-		struct hstate *h = page_hstate(src);
-		nr_pages = pages_per_huge_page(h);
-
-		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
-			__copy_gigantic_page(dst, src, nr_pages);
-			return;
-		}
-	} else {
-		/* thp page */
-		BUG_ON(!PageTransHuge(src));
-		nr_pages = thp_nr_pages(src);
+		hugetlb_copy_page(dst, src);
+		return;
 	}
 
+	/* thp page */
+	BUG_ON(!PageTransHuge(src));
+	nr_pages = thp_nr_pages(src);
+
 	for (i = 0; i < nr_pages; i++) {
 		cond_resched();
 		copy_highpage(dst + i, src + i);
_

Patches currently in -mm which might be from almasrymina@google.com are



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-06-01  0:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01  0:32 [to-be-updated] mm-hugetlb-fix-resv_huge_pages-underflow-on-uffdio_copy.patch removed from -mm tree akpm
  -- strict thread matches above, loose matches on Subject: below --
2021-05-22 22:18 akpm

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.