linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K
@ 2021-02-02  9:11 Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 2/6] selftest/mremap_test: Avoid crash with static build Aneesh Kumar K.V
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02  9:11 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel, Aneesh Kumar K.V

Instead of hardcoding 4K page size fetch it using sysconf(). For the performance
measurements test still assume 2M and 1G are hugepage sizes.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 tools/testing/selftests/vm/mremap_test.c | 113 ++++++++++++-----------
 1 file changed, 61 insertions(+), 52 deletions(-)

diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 9c391d016922..c9a5461eb786 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -45,14 +45,15 @@ enum {
 	_4MB = 4ULL << 20,
 	_1GB = 1ULL << 30,
 	_2GB = 2ULL << 30,
-	PTE = _4KB,
 	PMD = _2MB,
 	PUD = _1GB,
 };
 
+#define PTE page_size
+
 #define MAKE_TEST(source_align, destination_align, size,	\
 		  overlaps, should_fail, test_name)		\
-{								\
+(struct test){							\
 	.name = test_name,					\
 	.config = {						\
 		.src_alignment = source_align,			\
@@ -252,12 +253,17 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
 	return 0;
 }
 
+#define MAX_TEST 13
+#define MAX_PERF_TEST 3
 int main(int argc, char **argv)
 {
 	int failures = 0;
 	int i, run_perf_tests;
 	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
 	unsigned int pattern_seed;
+	struct test test_cases[MAX_TEST];
+	struct test perf_test_cases[MAX_PERF_TEST];
+	int page_size;
 	time_t t;
 
 	pattern_seed = (unsigned int) time(&t);
@@ -268,56 +274,59 @@ int main(int argc, char **argv)
 	ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
 		       threshold_mb, pattern_seed);
 
-	struct test test_cases[] = {
-		/* Expected mremap failures */
-		MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE,
-		  "mremap - Source and Destination Regions Overlapping"),
-		MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
-		  "mremap - Destination Address Misaligned (1KB-aligned)"),
-		MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
-		  "mremap - Source Address Misaligned (1KB-aligned)"),
-
-		/* Src addr PTE aligned */
-		MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "8KB mremap - Source PTE-aligned, Destination PTE-aligned"),
-
-		/* Src addr 1MB aligned */
-		MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"),
-		MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"),
-
-		/* Src addr PMD aligned */
-		MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "4MB mremap - Source PMD-aligned, Destination PTE-aligned"),
-		MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"),
-		MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "4MB mremap - Source PMD-aligned, Destination PMD-aligned"),
-
-		/* Src addr PUD aligned */
-		MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "2GB mremap - Source PUD-aligned, Destination PTE-aligned"),
-		MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"),
-		MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "2GB mremap - Source PUD-aligned, Destination PMD-aligned"),
-		MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "2GB mremap - Source PUD-aligned, Destination PUD-aligned"),
-	};
-
-	struct test perf_test_cases[] = {
-		/*
-		 * mremap 1GB region - Page table level aligned time
-		 * comparison.
-		 */
-		MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "1GB mremap - Source PTE-aligned, Destination PTE-aligned"),
-		MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "1GB mremap - Source PMD-aligned, Destination PMD-aligned"),
-		MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
-		  "1GB mremap - Source PUD-aligned, Destination PUD-aligned"),
-	};
+	page_size = sysconf(_SC_PAGESIZE);
+
+	/* Expected mremap failures */
+	test_cases[0] =	MAKE_TEST(page_size, page_size, page_size,
+				  OVERLAPPING, EXPECT_FAILURE,
+				  "mremap - Source and Destination Regions Overlapping");
+
+	test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
+				  NON_OVERLAPPING, EXPECT_FAILURE,
+				  "mremap - Destination Address Misaligned (1KB-aligned)");
+	test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
+				  NON_OVERLAPPING, EXPECT_FAILURE,
+				  "mremap - Source Address Misaligned (1KB-aligned)");
+
+	/* Src addr PTE aligned */
+	test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
+				  NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
+
+	/* Src addr 1MB aligned */
+	test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
+	test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+	/* Src addr PMD aligned */
+	test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
+	test_cases[7] =	MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
+	test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
+
+	/* Src addr PUD aligned */
+	test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
+	test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				   "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
+	test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				   "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
+	test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				   "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+	perf_test_cases[0] =  MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+					"1GB mremap - Source PTE-aligned, Destination PTE-aligned");
+	/*
+	 * mremap 1GB region - Page table level aligned time
+	 * comparison.
+	 */
+	perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				       "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
+	perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				       "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
 
 	run_perf_tests =  (threshold_mb == VALIDATION_NO_THRESHOLD) ||
 				(threshold_mb * _1MB >= _1GB);
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 2/6] selftest/mremap_test: Avoid crash with static build
  2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
@ 2021-02-02  9:11 ` Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02  9:11 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel, Aneesh Kumar K.V

With a large mmap map size, we can overlap with the text area and using
MAP_FIXED results in unmapping that area. Switch to MAP_FIXED_NOREPLACE
and handle the EEXIST error.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 tools/testing/selftests/vm/mremap_test.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index c9a5461eb786..0624d1bd71b5 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -75,9 +75,10 @@ static void *get_source_mapping(struct config c)
 retry:
 	addr += c.src_alignment;
 	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
-			MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+			MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+			-1, 0);
 	if (src_addr == MAP_FAILED) {
-		if (errno == EPERM)
+		if (errno == EPERM || errno == EEXIST)
 			goto retry;
 		goto error;
 	}
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries
  2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 2/6] selftest/mremap_test: Avoid crash with static build Aneesh Kumar K.V
@ 2021-02-02  9:11 ` Aneesh Kumar K.V
  2021-02-02 10:47   ` Peter Zijlstra
  2021-02-02  9:11 ` [RFC PATCH 4/6] mm/mremap: Use mmu gather interface instead of flush_tlb_range Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02  9:11 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel, Aneesh Kumar K.V

pmd/pud_populate is the right interface to be used to set the respective
page table entries. Some architectures do assume that set_pmd/pud_at
can only be used to set a hugepage PTE. Since we are not setting up a hugepage
PTE here, use the pmd/pud_populate interface.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/mremap.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/mremap.c b/mm/mremap.c
index f554320281cc..54fd2302b99d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -27,6 +27,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 #include "internal.h"
 
@@ -258,9 +259,8 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	pmd_clear(old_pmd);
 
 	VM_BUG_ON(!pmd_none(*new_pmd));
+	pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd));
 
-	/* Set the new pmd */
-	set_pmd_at(mm, new_addr, new_pmd, pmd);
 	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
@@ -307,8 +307,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 
 	VM_BUG_ON(!pud_none(*new_pud));
 
-	/* Set the new pud */
-	set_pud_at(mm, new_addr, new_pud, pud);
+	pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud));
 	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 4/6] mm/mremap: Use mmu gather interface instead of flush_tlb_range
  2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 2/6] selftest/mremap_test: Avoid crash with static build Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries Aneesh Kumar K.V
@ 2021-02-02  9:11 ` Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 5/6] mm/mremap: Allow arch runtime override Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02  9:11 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel, Aneesh Kumar K.V

Some architectures do have the concept of page walk cache and only mmu gather
interface supports flushing them. A fast mremap that involves moving page
table pages instead of copying pte entries should flush page walk cache since
the old translation cache is no more valid. Hence switch to mm gather to flush
TLB and mark tlb.freed_tables = 1. No page table pages need to be freed here.
With this the tlb flush is done outside page table lock (ptl).

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/mremap.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/mm/mremap.c b/mm/mremap.c
index 54fd2302b99d..14778d215011 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -217,6 +217,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 {
 	spinlock_t *old_ptl, *new_ptl;
 	struct mm_struct *mm = vma->vm_mm;
+	struct mmu_gather tlb;
 	pmd_t pmd;
 
 	/*
@@ -245,11 +246,12 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
 		return false;
 
+	tlb_gather_mmu(&tlb, mm, old_addr, PMD_SIZE);
 	/*
 	 * We don't have to worry about the ordering of src and dst
 	 * ptlocks because exclusive mmap_lock prevents deadlock.
 	 */
-	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	old_ptl = pmd_lock(mm, old_pmd);
 	new_ptl = pmd_lockptr(mm, new_pmd);
 	if (new_ptl != old_ptl)
 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
@@ -258,13 +260,23 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	pmd = *old_pmd;
 	pmd_clear(old_pmd);
 
+	/*
+	 * Mark the range. We are not freeing page table pages nor
+	 * regular pages. Hence we don't need to call tlb_remove_table()
+	 * or tlb_remove_page().
+	 */
+	tlb_flush_pte_range(&tlb, old_addr, PMD_SIZE);
+	tlb.freed_tables = 1;
 	VM_BUG_ON(!pmd_none(*new_pmd));
 	pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd));
 
-	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);
+	/*
+	 * This will invalidate both the old TLB and page table walk caches.
+	 */
+	tlb_finish_mmu(&tlb, old_addr, PMD_SIZE);
 
 	return true;
 }
@@ -283,6 +295,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 {
 	spinlock_t *old_ptl, *new_ptl;
 	struct mm_struct *mm = vma->vm_mm;
+	struct mmu_gather tlb;
 	pud_t pud;
 
 	/*
@@ -292,11 +305,12 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 	if (WARN_ON_ONCE(!pud_none(*new_pud)))
 		return false;
 
+	tlb_gather_mmu(&tlb, mm, old_addr, PUD_SIZE);
 	/*
 	 * We don't have to worry about the ordering of src and dst
 	 * ptlocks because exclusive mmap_lock prevents deadlock.
 	 */
-	old_ptl = pud_lock(vma->vm_mm, old_pud);
+	old_ptl = pud_lock(mm, old_pud);
 	new_ptl = pud_lockptr(mm, new_pud);
 	if (new_ptl != old_ptl)
 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
@@ -305,14 +319,25 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 	pud = *old_pud;
 	pud_clear(old_pud);
 
+	/*
+	 * Mark the range. We are not freeing page table pages nor
+	 * regular pages. Hence we don't need to call tlb_remove_table()
+	 * or tlb_remove_page().
+	 */
+	tlb_flush_pte_range(&tlb, old_addr, PUD_SIZE);
+	tlb.freed_tables = 1;
 	VM_BUG_ON(!pud_none(*new_pud));
 
 	pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud));
-	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
+
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);
 
+	/*
+	 * This will invalidate both the old TLB and page table walk caches.
+	 */
+	tlb_finish_mmu(&tlb, old_addr, PUD_SIZE);
 	return true;
 }
 #else
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 5/6] mm/mremap: Allow arch runtime override
  2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2021-02-02  9:11 ` [RFC PATCH 4/6] mm/mremap: Use mmu gather interface instead of flush_tlb_range Aneesh Kumar K.V
@ 2021-02-02  9:11 ` Aneesh Kumar K.V
  2021-02-02  9:11 ` [RFC PATCH 6/6] powerpc/mm: Enable move pmd/pud Aneesh Kumar K.V
  2021-02-02 13:29 ` [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Li Xinhai
  5 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02  9:11 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel, Aneesh Kumar K.V

Architectures like ppc64 can only support faster mremap only with radix
translation. Hence allow a runtime check w.r.t support for fast mremap.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/arm64/include/asm/tlb.h   |  6 ++++++
 arch/powerpc/include/asm/tlb.h |  6 ++++++
 arch/x86/include/asm/tlb.h     |  5 +++++
 mm/mremap.c                    | 14 +++++++++++++-
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 61c97d3b58c7..fe209efc6a10 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -94,4 +94,10 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
 }
 #endif
 
+#define arch_supports_page_tables_move arch_supports_page_tables_move
+static inline bool arch_supports_page_tables_move(void)
+{
+	return true;
+}
+
 #endif
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 160422a439aa..058918a7cd3c 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -83,5 +83,11 @@ static inline int mm_is_thread_local(struct mm_struct *mm)
 }
 #endif
 
+#define arch_supports_page_tables_move arch_supports_page_tables_move
+static inline bool arch_supports_page_tables_move(void)
+{
+	return radix_enabled();
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_TLB_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 820082bd6880..62827553afd8 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -38,4 +38,9 @@ static inline void __tlb_remove_table(void *table)
 	free_page_and_swap_cache(table);
 }
 
+#define arch_supports_page_tables_move arch_supports_page_tables_move
+static inline bool arch_supports_page_tables_move(void)
+{
+	return true;
+}
 #endif /* _ASM_X86_TLB_H */
diff --git a/mm/mremap.c b/mm/mremap.c
index 14778d215011..dd1244a410bb 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -26,7 +26,7 @@
 #include <linux/userfaultfd_k.h>
 
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
 #include <asm/pgalloc.h>
 
 #include "internal.h"
@@ -211,6 +211,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+#ifndef arch_supports_page_tables_move
+#define arch_supports_page_tables_move arch_supports_page_tables_move
+static inline bool arch_supports_page_tables_move(void)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_HAVE_MOVE_PMD
 static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
@@ -220,6 +228,8 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	struct mmu_gather tlb;
 	pmd_t pmd;
 
+	if (!arch_supports_page_tables_move())
+		return false;
 	/*
 	 * The destination pmd shouldn't be established, free_pgtables()
 	 * should have released it.
@@ -298,6 +308,8 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 	struct mmu_gather tlb;
 	pud_t pud;
 
+	if (!arch_supports_page_tables_move())
+		return false;
 	/*
 	 * The destination pud shouldn't be established, free_pgtables()
 	 * should have released it.
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 6/6] powerpc/mm: Enable move pmd/pud
  2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2021-02-02  9:11 ` [RFC PATCH 5/6] mm/mremap: Allow arch runtime override Aneesh Kumar K.V
@ 2021-02-02  9:11 ` Aneesh Kumar K.V
  2021-02-02 13:29 ` [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Li Xinhai
  5 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02  9:11 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel, Aneesh Kumar K.V

mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:
1GB mremap - Source PTE-aligned, Destination PTE-aligned
        mremap time:      1114318ns
1GB mremap - Source PMD-aligned, Destination PMD-aligned
        mremap time:      1097715ns
1GB mremap - Source PUD-aligned, Destination PUD-aligned
        mremap time:        26851ns

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/Kconfig.cputype | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 3ce907523b1e..2e666e569fdf 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -97,6 +97,8 @@ config PPC_BOOK3S_64
 	select PPC_HAVE_PMU_SUPPORT
 	select SYS_SUPPORTS_HUGETLBFS
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_MOVE_PMD
+	select HAVE_MOVE_PUD
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select IRQ_WORK
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries
  2021-02-02  9:11 ` [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries Aneesh Kumar K.V
@ 2021-02-02 10:47   ` Peter Zijlstra
  2021-02-02 14:50     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2021-02-02 10:47 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linux-mm, akpm, mpe, linuxppc-dev, kaleshsingh, joel

On Tue, Feb 02, 2021 at 02:41:13PM +0530, Aneesh Kumar K.V wrote:
> pmd/pud_populate is the right interface to be used to set the respective
> page table entries. Some architectures do assume that set_pmd/pud_at
> can only be used to set a hugepage PTE. Since we are not setting up a hugepage
> PTE here, use the pmd/pud_populate interface.

Since you did the audit, it might be nice to record which architectures
that are. Also, how much work to fix them?


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K
  2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2021-02-02  9:11 ` [RFC PATCH 6/6] powerpc/mm: Enable move pmd/pud Aneesh Kumar K.V
@ 2021-02-02 13:29 ` Li Xinhai
  2021-02-02 14:51   ` Aneesh Kumar K.V
  5 siblings, 1 reply; 10+ messages in thread
From: Li Xinhai @ 2021-02-02 13:29 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm
  Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel

what is the overall purpose of this patch set? maybe need a cover
letter?

On 2/2/21 5:11 PM, Aneesh Kumar K.V wrote:
> Instead of hardcoding 4K page size fetch it using sysconf(). For the performance
> measurements test still assume 2M and 1G are hugepage sizes.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>   tools/testing/selftests/vm/mremap_test.c | 113 ++++++++++++-----------
>   1 file changed, 61 insertions(+), 52 deletions(-)
> 
> diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
> index 9c391d016922..c9a5461eb786 100644
> --- a/tools/testing/selftests/vm/mremap_test.c
> +++ b/tools/testing/selftests/vm/mremap_test.c
> @@ -45,14 +45,15 @@ enum {
>   	_4MB = 4ULL << 20,
>   	_1GB = 1ULL << 30,
>   	_2GB = 2ULL << 30,
> -	PTE = _4KB,
>   	PMD = _2MB,
>   	PUD = _1GB,
>   };
>   
> +#define PTE page_size
> +
>   #define MAKE_TEST(source_align, destination_align, size,	\
>   		  overlaps, should_fail, test_name)		\
> -{								\
> +(struct test){							\
>   	.name = test_name,					\
>   	.config = {						\
>   		.src_alignment = source_align,			\
> @@ -252,12 +253,17 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
>   	return 0;
>   }
>   
> +#define MAX_TEST 13
> +#define MAX_PERF_TEST 3
>   int main(int argc, char **argv)
>   {
>   	int failures = 0;
>   	int i, run_perf_tests;
>   	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
>   	unsigned int pattern_seed;
> +	struct test test_cases[MAX_TEST];
> +	struct test perf_test_cases[MAX_PERF_TEST];
> +	int page_size;
>   	time_t t;
>   
>   	pattern_seed = (unsigned int) time(&t);
> @@ -268,56 +274,59 @@ int main(int argc, char **argv)
>   	ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
>   		       threshold_mb, pattern_seed);
>   
> -	struct test test_cases[] = {
> -		/* Expected mremap failures */
> -		MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE,
> -		  "mremap - Source and Destination Regions Overlapping"),
> -		MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
> -		  "mremap - Destination Address Misaligned (1KB-aligned)"),
> -		MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
> -		  "mremap - Source Address Misaligned (1KB-aligned)"),
> -
> -		/* Src addr PTE aligned */
> -		MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "8KB mremap - Source PTE-aligned, Destination PTE-aligned"),
> -
> -		/* Src addr 1MB aligned */
> -		MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"),
> -		MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"),
> -
> -		/* Src addr PMD aligned */
> -		MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "4MB mremap - Source PMD-aligned, Destination PTE-aligned"),
> -		MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"),
> -		MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "4MB mremap - Source PMD-aligned, Destination PMD-aligned"),
> -
> -		/* Src addr PUD aligned */
> -		MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "2GB mremap - Source PUD-aligned, Destination PTE-aligned"),
> -		MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"),
> -		MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "2GB mremap - Source PUD-aligned, Destination PMD-aligned"),
> -		MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "2GB mremap - Source PUD-aligned, Destination PUD-aligned"),
> -	};
> -
> -	struct test perf_test_cases[] = {
> -		/*
> -		 * mremap 1GB region - Page table level aligned time
> -		 * comparison.
> -		 */
> -		MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "1GB mremap - Source PTE-aligned, Destination PTE-aligned"),
> -		MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "1GB mremap - Source PMD-aligned, Destination PMD-aligned"),
> -		MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> -		  "1GB mremap - Source PUD-aligned, Destination PUD-aligned"),
> -	};
> +	page_size = sysconf(_SC_PAGESIZE);
> +
> +	/* Expected mremap failures */
> +	test_cases[0] =	MAKE_TEST(page_size, page_size, page_size,
> +				  OVERLAPPING, EXPECT_FAILURE,
> +				  "mremap - Source and Destination Regions Overlapping");
> +
> +	test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
> +				  NON_OVERLAPPING, EXPECT_FAILURE,
> +				  "mremap - Destination Address Misaligned (1KB-aligned)");
> +	test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
> +				  NON_OVERLAPPING, EXPECT_FAILURE,
> +				  "mremap - Source Address Misaligned (1KB-aligned)");
> +
> +	/* Src addr PTE aligned */
> +	test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
> +				  NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
> +
> +	/* Src addr 1MB aligned */
> +	test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
> +	test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
> +
> +	/* Src addr PMD aligned */
> +	test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
> +	test_cases[7] =	MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
> +	test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
> +
> +	/* Src addr PUD aligned */
> +	test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				  "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
> +	test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				   "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
> +	test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				   "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
> +	test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				   "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
> +
> +	perf_test_cases[0] =  MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +					"1GB mremap - Source PTE-aligned, Destination PTE-aligned");
> +	/*
> +	 * mremap 1GB region - Page table level aligned time
> +	 * comparison.
> +	 */
> +	perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				       "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
> +	perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
> +				       "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
>   
>   	run_perf_tests =  (threshold_mb == VALIDATION_NO_THRESHOLD) ||
>   				(threshold_mb * _1MB >= _1GB);
> 



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries
  2021-02-02 10:47   ` Peter Zijlstra
@ 2021-02-02 14:50     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02 14:50 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, akpm, mpe, linuxppc-dev, kaleshsingh, joel

On 2/2/21 4:17 PM, Peter Zijlstra wrote:
> On Tue, Feb 02, 2021 at 02:41:13PM +0530, Aneesh Kumar K.V wrote:
>> pmd/pud_populate is the right interface to be used to set the respective
>> page table entries. Some architectures do assume that set_pmd/pud_at
>> can only be used to set a hugepage PTE. Since we are not setting up a hugepage
>> PTE here, use the pmd/pud_populate interface.
> 
> Since you did the audit, it might be nice to record which architectures
> that are. Also, how much work to fix them?
> 

I didn't audit them completely primarily because I don't follow the page 
table format of other archs. But I guess arm64, s390 appears to make 
that assumption.

Moreover set_pmd_at was added as part of THP patch series.

BTW why should we fix those architectures ? isn't pmd_populate the right 
interface for updating pmd entries other than hugepage pte ?

-aneesh


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K
  2021-02-02 13:29 ` [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Li Xinhai
@ 2021-02-02 14:51   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2021-02-02 14:51 UTC (permalink / raw)
  To: Li Xinhai, linux-mm, akpm; +Cc: mpe, linuxppc-dev, kaleshsingh, peterz, joel

On 2/2/21 6:59 PM, Li Xinhai wrote:
> what is the overall purpose of this patch set? maybe need a cover
> letter?
> 


The goal of the patch series was to enable MOVE_PMD/PUD on ppc64. But 
the series itself achieves that by fixing the existing code rather than 
adding changes to arch/ppc64. With that we could consider the series as 
a bug fix series.

-aneesh


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-02-02 14:51 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-02  9:11 [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
2021-02-02  9:11 ` [RFC PATCH 2/6] selftest/mremap_test: Avoid crash with static build Aneesh Kumar K.V
2021-02-02  9:11 ` [RFC PATCH 3/6] mm/mremap: Use pmd/pud_poplulate to update page table entries Aneesh Kumar K.V
2021-02-02 10:47   ` Peter Zijlstra
2021-02-02 14:50     ` Aneesh Kumar K.V
2021-02-02  9:11 ` [RFC PATCH 4/6] mm/mremap: Use mmu gather interface instead of flush_tlb_range Aneesh Kumar K.V
2021-02-02  9:11 ` [RFC PATCH 5/6] mm/mremap: Allow arch runtime override Aneesh Kumar K.V
2021-02-02  9:11 ` [RFC PATCH 6/6] powerpc/mm: Enable move pmd/pud Aneesh Kumar K.V
2021-02-02 13:29 ` [RFC PATCH 1/6] selftest/mremap_test: Update the test to handle pagesize other than 4K Li Xinhai
2021-02-02 14:51   ` Aneesh Kumar K.V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).