All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring@users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring@users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel-team, Joel Fernandes (Google),
	Michal Hocko, Julia Lawall, elfring, Andrey Ryabinin,
	Andy Lutomirski, Borislav Petkov, Catalin Marinas, Chris Zankel,
	dancol, Dave Hansen, David S. Miller, Fenghua Yu,
	Geert Uytterhoeven, Guan Xuetao, Helge Deller, hughd,
	Ingo Molnar, James E.J. Bottomley, Jeff Dike, Jonas Bonn,
	kasan-dev, kvmarm, Ley Foon Tan, linux-alpha, linux-arm-kernel,
	linux-hexagon, linux-ia64, linux-m68k, linux-mips, linux-mm,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	linux-snps-arc, linux-um, linux-xtensa, pantin, lokeshgidra,
	Max Filippov, minchan, nios2-dev, openrisc, Peter Zijlstra,
	Richard Weinberger, Rich Felker, Sam Creasey, sparclinux,
	Stafford Horne, Stefan Kristiansson, Thomas Gleixner, Tony Luck,
	Will Deacon, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, kirill, akpm

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring@users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel-team, Joel Fernandes (Google),
	Michal Hocko, Julia Lawall, elfring, Andrey Ryabinin,
	Andy Lutomirski, Borislav Petkov, Catalin Marinas, Chris Zankel,
	dancol, Dave Hansen, David S. Miller, Fenghua Yu,
	Geert Uytterhoeven, Guan Xuetao, Helge Deller, hughd,
	Ingo Molnar, James E.J. Bottomley, Jeff Dike, Jonas Bonn,
	kasan-dev, kvmarm, Ley Foon Tan, linux-alpha, linux-arm-kernel,
	linux-hexagon, linux-ia64, linux-m68k, linux-mips, linux-mm,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	linux-snps-arc, linux-um, linux-xtensa, pantin, lokeshgidra,
	Max Filippov, minchan, nios2-dev, openrisc, Peter Zijlstra,
	Richard Weinberger, Rich Felker, Sam Creasey, sparclinux,
	Stafford Horne, Stefan Kristiansson, Thomas Gleixner, Tony Luck,
	Will Deacon, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	Yoshinori Sato, kirill, akpm

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring@users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-riscv

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring at users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring@users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring@users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog


^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-snps-arc

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill at shutemov.name>
Cc: Michal Hocko <mhocko at kernel.org>
Cc: Julia Lawall <Julia.Lawall at lip6.fr>
Cc: elfring at users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12  1:37 ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12  1:37 UTC (permalink / raw)
  To: openrisc

This series speeds up mremap(2) syscall by copying page tables at the
PMD level even for non-THP systems. There is concern that the extra
'address' argument that mremap passes to pte_alloc may do something
subtle architecture related in the future, that makes the scheme not
work.  Also we find that there is no point in passing the 'address' to
pte_alloc since its unused.

This patch therefore removes this argument tree-wide resulting in a nice
negative diff as well. Also ensuring along the way that the architecture
does not do anything funky with 'address' argument that goes unnoticed.

Build and boot tested on x86-64. Build tested on arm64.

The changes were obtained by applying the following Coccinelle script.
The pte_fragment_alloc was manually fixed up since it was only 2
occurences and could not be easily generalized (and thanks Julia for
answering all my silly and not-silly Coccinelle questions!).

// Options: --include-headers --no-includes
// Note: I split the 'identifier fn' line, so if you are manually
// running it, please unsplit it so it runs for you.

virtual patch

@pte_alloc_func_def depends on patch exists@
identifier E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
type T2;
@@

 fn(...
- , T2 E2
 )
 { ... }

@pte_alloc_func_proto depends on patch exists@
identifier E1, E2, E4;
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1 E1, T2 E2);
+ T3 fn(T1 E1);
|
- T3 fn(T1 E1, T2 E2, T4 E4);
+ T3 fn(T1 E1, T2 E2);
)

@pte_alloc_func_call depends on patch exists@
expression E2;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

 fn(...
-,  E2
 )

@pte_alloc_macro depends on patch exists@
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
identifier a, b, c;
expression e;
position p;
@@

(
- #define fn(a, b, c)@p e
+ #define fn(a, b) e
|
- #define fn(a, b)@p e
+ #define fn(a) e
)

Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: elfring at users.sourceforge.net
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/alpha/include/asm/pgalloc.h             |  6 +++---
 arch/arc/include/asm/pgalloc.h               |  5 ++---
 arch/arm/include/asm/pgalloc.h               |  4 ++--
 arch/arm64/include/asm/pgalloc.h             |  4 ++--
 arch/hexagon/include/asm/pgalloc.h           |  6 ++----
 arch/ia64/include/asm/pgalloc.h              |  5 ++---
 arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
 arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
 arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
 arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
 arch/microblaze/mm/pgtable.c                 |  3 +--
 arch/mips/include/asm/pgalloc.h              |  6 ++----
 arch/nds32/include/asm/pgalloc.h             |  5 ++---
 arch/nios2/include/asm/pgalloc.h             |  6 ++----
 arch/openrisc/include/asm/pgalloc.h          |  5 ++---
 arch/openrisc/mm/ioremap.c                   |  3 +--
 arch/parisc/include/asm/pgalloc.h            |  4 ++--
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
 arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  6 ++----
 arch/s390/include/asm/pgalloc.h              |  4 ++--
 arch/sh/include/asm/pgalloc.h                |  6 ++----
 arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
 arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
 arch/sparc/mm/init_64.c                      |  6 ++----
 arch/sparc/mm/srmmu.c                        |  4 ++--
 arch/um/kernel/mem.c                         |  4 ++--
 arch/unicore32/include/asm/pgalloc.h         |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
 include/linux/mm.h                           | 13 ++++++-------
 mm/huge_memory.c                             |  8 ++++----
 mm/kasan/kasan_init.c                        |  2 +-
 mm/memory.c                                  | 17 ++++++++---------
 mm/migrate.c                                 |  2 +-
 mm/mremap.c                                  |  2 +-
 mm/userfaultfd.c                             |  2 +-
 virt/kvm/arm/mmu.c                           |  2 +-
 43 files changed, 95 insertions(+), 145 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	pte_t *pte = pte_alloc_one_kernel(mm);
 	struct page *page;
 
 	if (!pte)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
 	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte_pg;
 	struct page *page;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
  *  +------------+
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 }
 
 /* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return page;
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 extern const char bad_pmd_string[];
 
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_DMA);
 
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
 
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 
 #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do {							\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	unsigned long page = __get_free_page(GFP_KERNEL);
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
         struct page *page = alloc_pages(GFP_KERNEL, 0);
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
 #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-		unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return ptepage;
 }
 
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
-		unsigned long address)
-{
-	unsigned long *ret;
-
-	ret = pte_quicklist;
-	if (ret != NULL) {
-		pte_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	}
-	return (pte_t *)ret;
-}
-
 static inline void pte_free_fast(pte_t *pte)
 {
 	*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-		unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 	if (mem_init_done) {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()		do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..24fb1021c75a 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
  * the memblock infrastructure.
  */
 
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 }
 
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, 1);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cff1d426ca6a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..b99a89cdcc5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
 extern char etext[], _stext[], _sinittext[], _einittext[];
 
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *ptepage;
 
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-	unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(
 		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-	unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..ce2ca8cbd2ec 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Allocate and free page tables.
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
 	void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(pgtable_cache, pmd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f396048a0d68..6133f21811e9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-			    unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	pte_t *pte = NULL;
@@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm,
-			unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..ce67a96e70c3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
  * Alignments up to the page size are the same for physical and virtual
  * addresses of the nocache area.
  */
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	unsigned long pte;
 	struct page *page;
 
-	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470ea646..1f277191fbf3 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * Allocate one PTE table.
  */
 static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 }
 
 static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fbd578daa66e..5068e85165b2 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..a2eff247377b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *pte;
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					 unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
 	int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return ptep;
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm, addr);
+	pte = pte_alloc_one_kernel(mm);
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..89c2b1739a69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
 static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
 
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc(mm, pmd, address)			\
-	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
 
 #define pte_alloc_map(mm, pmd, address)			\
-	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	(pte_alloc(mm, pmd, address) ?			\
+	(pte_alloc(mm, pmd) ?			\
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..fd7e8714e5a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(vma->vm_mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
 		ret = VM_FAULT_OOM;
 		goto release;
@@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		struct page *zero_page;
 		bool set;
 		vm_fault_t ret;
-		pgtable = pte_alloc_one(vma->vm_mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 
 	if (arch_needs_pgtable_deposit()) {
-		pgtable = pte_alloc_one(vma->vm_mm, addr);
+		pgtable = pte_alloc_one(vma->vm_mm);
 		if (!pgtable)
 			return VM_FAULT_OOM;
 	}
@@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!vma_is_anonymous(vma))
 		return 0;
 
-	pgtable = pte_alloc_one(dst_mm, addr);
+	pgtable = pte_alloc_one(dst_mm);
 	if (unlikely(!pgtable))
 		goto out;
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..272849cd2007 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			pte_t *p;
 
 			if (slab_is_available())
-				p = pte_alloc_one_kernel(&init_mm, addr);
+				p = pte_alloc_one_kernel(&init_mm);
 			else
 				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
 			if (!p)
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..3afdcf38993d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	pgtable_t new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
 		return -ENOMEM;
 
@@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
@@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
-	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	 * related to pte entry. Use the preallocated table for that.
 	 */
 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
@@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			start_pgoff + nr_pages - 1);
 
 	if (pmd_none(*vmf->pmd)) {
-		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-						  vmf->address);
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
 		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..3080b0626026 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(mm, pmdp, addr))
+	if (pte_alloc(mm, pmdp))
 		goto abort;
 
 	/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..9e68a02a52b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			if (pmd_trans_unstable(old_pmd))
 				continue;
 		}
-		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 			err = -ENOMEM;
 			break;
 		}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..3f8180414301 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
-- 
2.19.0.605.g01d371f741-goog


^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12  1:37 ` Joel Fernandes (Google)
                     ` (6 preceding siblings ...)
  (?)
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan@kernel.org
Cc: pantin@google.com
Cc: hughd@google.com
Cc: lokeshgidra@google.com
Cc: dancol@google.com
Cc: mhocko@kernel.org
Cc: kirill@shutemov.name
Cc: akpm@linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan@kernel.org
Cc: pantin@google.com
Cc: hughd@google.com
Cc: lokeshgidra@google.com
Cc: dancol@google.com
Cc: mhocko@kernel.org
Cc: kirill@shutemov.name
Cc: akpm@linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel-team, Joel Fernandes (Google),
	minchan, pantin, hughd, lokeshgidra, dancol, mhocko, kirill,
	akpm, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Dave Hansen, David S. Miller,
	elfring, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Ingo Molnar, James E.J. Bottomley, Jeff Dike,
	Jonas Bonn, Julia Lawall, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, Max Filippov, nios2-dev, openrisc, Peter Zijlstra,
	Richard Weinberger, Rich Felker, Sam Creasey, sparclinux,
	Stafford Horne, Stefan Kristiansson, Thomas Gleixner, Tony Luck,
	Will Deacon, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan@kernel.org
Cc: pantin@google.com
Cc: hughd@google.com
Cc: lokeshgidra@google.com
Cc: dancol@google.com
Cc: mhocko@kernel.org
Cc: kirill@shutemov.name
Cc: akpm@linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel-team, Joel Fernandes (Google),
	minchan, pantin, hughd, lokeshgidra, dancol, mhocko, kirill,
	akpm, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Dave Hansen, David S. Miller,
	elfring, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Ingo Molnar, James E.J. Bottomley, Jeff Dike,
	Jonas Bonn, Julia Lawall, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, Max Filippov, nios2-dev, openrisc, Peter Zijlstra,
	Richard Weinberger, Rich Felker, Sam Creasey, sparclinux,
	Stafford Horne, Stefan Kristiansson, Thomas Gleixner, Tony Luck,
	Will Deacon, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	Yoshinori Sato

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan@kernel.org
Cc: pantin@google.com
Cc: hughd@google.com
Cc: lokeshgidra@google.com
Cc: dancol@google.com
Cc: mhocko@kernel.org
Cc: kirill@shutemov.name
Cc: akpm@linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-riscv

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan at kernel.org
Cc: pantin at google.com
Cc: hughd at google.com
Cc: lokeshgidra at google.com
Cc: dancol at google.com
Cc: mhocko at kernel.org
Cc: kirill at shutemov.name
Cc: akpm at linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan@kernel.org
Cc: pantin@google.com
Cc: hughd@google.com
Cc: lokeshgidra@google.com
Cc: dancol@google.com
Cc: mhocko@kernel.org
Cc: kirill@shutemov.name
Cc: akpm@linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan@kernel.org
Cc: pantin@google.com
Cc: hughd@google.com
Cc: lokeshgidra@google.com
Cc: dancol@google.com
Cc: mhocko@kernel.org
Cc: kirill@shutemov.name
Cc: akpm@linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog


^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes (Google) @ 2018-10-12  1:37 UTC (permalink / raw)
  To: linux-snps-arc

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan at kernel.org
Cc: pantin at google.com
Cc: hughd at google.com
Cc: lokeshgidra at google.com
Cc: dancol at google.com
Cc: mhocko at kernel.org
Cc: kirill at shutemov.name
Cc: akpm at linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12  1:37   ` Joel Fernandes (Google)
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12  1:37 UTC (permalink / raw)
  To: openrisc

Android needs to mremap large regions of memory during memory management
related operations. The mremap system call can be really slow if THP is
not enabled. The bottleneck is move_page_tables, which is copying each
pte at a time, and can be really slow across a large map. Turning on THP
may not be a viable option, and is not for us. This patch speeds up the
performance for non-THP system by copying at the PMD level when possible.

The speed up is three orders of magnitude. On a 1GB mremap, the mremap
completion times drops from 160-250 millesconds to 380-400 microseconds.

Before:
Total mremap time for 1GB data: 242321014 nanoseconds.
Total mremap time for 1GB data: 196842467 nanoseconds.
Total mremap time for 1GB data: 167051162 nanoseconds.

After:
Total mremap time for 1GB data: 385781 nanoseconds.
Total mremap time for 1GB data: 388959 nanoseconds.
Total mremap time for 1GB data: 402813 nanoseconds.

Incase THP is enabled, the optimization is skipped. I also flush the
tlb every time we do this optimization since I couldn't find a way to
determine if the low-level PTEs are dirty. It is seen that the cost of
doing so is not much compared the improvement, on both x86-64 and arm64.

Cc: minchan at kernel.org
Cc: pantin at google.com
Cc: hughd at google.com
Cc: lokeshgidra at google.com
Cc: dancol at google.com
Cc: mhocko at kernel.org
Cc: kirill at shutemov.name
Cc: akpm at linux-foundation.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/mm/mremap.c b/mm/mremap.c
index 9e68a02a52b1..d82c485822ef 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+		  unsigned long new_addr, unsigned long old_end,
+		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+{
+	spinlock_t *old_ptl, *new_ptl;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+	    || old_end - old_addr < PMD_SIZE)
+		return false;
+
+	/*
+	 * The destination pmd shouldn't be established, free_pgtables()
+	 * should have release it.
+	 */
+	if (WARN_ON(!pmd_none(*new_pmd)))
+		return false;
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+	if (old_ptl) {
+		pmd_t pmd;
+
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		/* Clear the pmd */
+		pmd = *old_pmd;
+		pmd_clear(old_pmd);
+
+		VM_BUG_ON(!pmd_none(*new_pmd));
+
+		/* Set the new pmd */
+		set_pmd_at(mm, new_addr, new_pmd, pmd);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
+
+		*need_flush = true;
+		return true;
+	}
+	return false;
+}
+
 unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
@@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 			split_huge_pmd(vma, old_pmd, old_addr);
 			if (pmd_trans_unstable(old_pmd))
 				continue;
+		} else if (extent == PMD_SIZE) {
+			bool moved;
+
+			/* See comment in move_ptes() */
+			if (need_rmap_locks)
+				take_rmap_locks(vma);
+			moved = move_normal_pmd(vma, old_addr, new_addr,
+					old_end, old_pmd, new_pmd,
+					&need_flush);
+			if (need_rmap_locks)
+				drop_rmap_locks(vma);
+			if (moved)
+				continue;
 		}
+
 		if (pte_alloc(new_vma->vm_mm, new_pmd))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
-- 
2.19.0.605.g01d371f741-goog


^ permalink raw reply related	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12  1:37   ` Joel Fernandes (Google)
                     ` (7 preceding siblings ...)
  (?)
@ 2018-10-12  6:40   ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12  6:40 UTC (permalink / raw)
  To: linux-um

On 12/10/2018 02:37, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of

That is a fairly high cost operation on UML. In fact, I suspect it 
results in zero gain or even worse.

I will try to find some time to look into this one today or over the 
weekend.

A.

> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;



_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12  1:37 ` Joel Fernandes (Google)
                     ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, kernel-team, Michal Hocko, Julia Lawall, elfring,
	Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, dancol, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, hughd, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, pantin, lokeshgidra, Max Filippov, minchan,
	nios2-dev, openrisc, Peter Zijlstra, Richard Weinberger,
	Rich Felker, Sam Creasey, sparclinux, Stafford Horne,
	Stefan Kristiansson, Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, akpm

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: linux-riscv

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: linux-snps-arc

On Thu, Oct 11, 2018@06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 11:09   ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:09 UTC (permalink / raw)
  To: openrisc

On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  
>  extern const char bad_pmd_string[];
>  
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>  {
>  	unsigned long page = __get_free_page(GFP_DMA);
>  
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>  
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -

I believe this was one done manually, right?
Please explicitely state everthing you did on not of sematic patch

...

> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>  #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>  #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>  
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>  {
>  	struct page *ptepage;
>  
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>  	return ptepage;
>  }
>  
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -

Ditto.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12  1:37   ` Joel Fernandes (Google)
                       ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: linux-riscv

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: linux-snps-arc

On Thu, Oct 11, 2018@06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:30     ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:30 UTC (permalink / raw)
  To: openrisc

On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.

I looked into the code more and noticed move_pte() helper called from
move_ptes(). It changes PTE entry to suite new address.

It is only defined in non-trivial way on Sparc. I don't know much about
Sparc and it's hard for me to say if the optimization will break anything
there.

I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
defined. Or make architectures state explicitely that the optimization is
safe.

> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {

Hm. What guarantees that new_addr is PMD_SIZE-aligned?
It's not obvious to me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 11:30     ` Kirill A. Shutemov
                         ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018@06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 11:36       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 11:36 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> 
> Hm. What guarantees that new_addr is PMD_SIZE-aligned?
> It's not obvious to me.

Ignore this :)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 11:30     ` Kirill A. Shutemov
                         ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 12:50       ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018@06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 12:50       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 12:50 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.

Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
not modifying the PTE itself AFAICS:

#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)                         \
({                                                                      \
        pte_t newpte = (pte);                                           \
        if (tlb_type != hypervisor && pte_present(pte)) {               \
                unsigned long this_pfn = pte_pfn(pte);                  \
                                                                        \
                if (pfn_valid(this_pfn) &&                              \
                    (((old_addr) ^ (new_addr)) & (1 << 13)))            \
                        flush_dcache_page_all(current->mm,              \
                                              pfn_to_page(this_pfn));   \
        }                                                               \
        newpte;                                                         \
})
#endif

If its an issue, then how do transparent huge pages work on Sparc?  I don't
see the huge page code (move_huge_pages) during mremap doing anything special
for Sparc architecture when moving PMDs..

Also, do we not flush the caches from any path when we munmap address space?
We do call do_munmap on the old mapping from mremap after moving to the new one.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 12:50       ` Joel Fernandes
                           ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018@02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018@06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 13:19         ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 13:19 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > I looked into the code more and noticed move_pte() helper called from
> > move_ptes(). It changes PTE entry to suite new address.
> > 
> > It is only defined in non-trivial way on Sparc. I don't know much about
> > Sparc and it's hard for me to say if the optimization will break anything
> > there.
> 
> Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> not modifying the PTE itself AFAICS:
> 
> #ifdef DCACHE_ALIASING_POSSIBLE
> #define __HAVE_ARCH_MOVE_PTE
> #define move_pte(pte, prot, old_addr, new_addr)                         \
> ({                                                                      \
>         pte_t newpte = (pte);                                           \
>         if (tlb_type != hypervisor && pte_present(pte)) {               \
>                 unsigned long this_pfn = pte_pfn(pte);                  \
>                                                                         \
>                 if (pfn_valid(this_pfn) &&                              \
>                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
>                         flush_dcache_page_all(current->mm,              \
>                                               pfn_to_page(this_pfn));   \
>         }                                                               \
>         newpte;                                                         \
> })
> #endif
> 
> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

My *guess* is that it will work fine on Sparc as it apprarently it only
cares about change in bit 13 of virtual address. It will never happen for
huge pages or when PTE page tables move.

But I just realized that the problem is bigger: since we pass new_addr to
the set_pte_at() we would need to audit all implementations that they are
safe with just moving PTE page table.

I would rather go with per-architecture enabling. It's much safer.

> Also, do we not flush the caches from any path when we munmap address space?
> We do call do_munmap on the old mapping from mremap after moving to the new one.

Are you sure about that? It can be hided deeper in architecture-specific
code.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12  1:37 ` Joel Fernandes (Google)
                     ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 13:56   ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring@users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring@users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring@users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: linux-riscv


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring at users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, linux-parisc, pantin, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring@users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, linux-parisc, pantin, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring@users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: linux-snps-arc


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill at shutemov.name>
> Cc: Michal Hocko <mhocko at kernel.org>
> Cc: Julia Lawall <Julia.Lawall at lip6.fr>
> Cc: elfring at users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 13:56   ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 13:56 UTC (permalink / raw)
  To: openrisc


On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> This series speeds up mremap(2) syscall by copying page tables at the
> PMD level even for non-THP systems. There is concern that the extra
> 'address' argument that mremap passes to pte_alloc may do something
> subtle architecture related in the future, that makes the scheme not
> work.  Also we find that there is no point in passing the 'address' to
> pte_alloc since its unused.
>
> This patch therefore removes this argument tree-wide resulting in a nice
> negative diff as well. Also ensuring along the way that the architecture
> does not do anything funky with 'address' argument that goes unnoticed.
>
> Build and boot tested on x86-64. Build tested on arm64.
>
> The changes were obtained by applying the following Coccinelle script.
> The pte_fragment_alloc was manually fixed up since it was only 2
> occurences and could not be easily generalized (and thanks Julia for
> answering all my silly and not-silly Coccinelle questions!).
>
> // Options: --include-headers --no-includes
> // Note: I split the 'identifier fn' line, so if you are manually
> // running it, please unsplit it so it runs for you.
>
> virtual patch
>
> @pte_alloc_func_def depends on patch exists@
> identifier E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> type T2;
> @@
>
>   fn(...
> - , T2 E2
>   )
>   { ... }
>
> @pte_alloc_func_proto depends on patch exists@
> identifier E1, E2, E4;
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )
>
> @pte_alloc_func_call depends on patch exists@
> expression E2;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
>   fn(...
> -,  E2
>   )
>
> @pte_alloc_macro depends on patch exists@
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> identifier a, b, c;
> expression e;
> position p;
> @@
>
> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )
>
> Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> Cc: elfring at users.sourceforge.net
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   arch/alpha/include/asm/pgalloc.h             |  6 +++---
>   arch/arc/include/asm/pgalloc.h               |  5 ++---
>   arch/arm/include/asm/pgalloc.h               |  4 ++--
>   arch/arm64/include/asm/pgalloc.h             |  4 ++--
>   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
>   arch/ia64/include/asm/pgalloc.h              |  5 ++---
>   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
>   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
>   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
>   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
>   arch/microblaze/mm/pgtable.c                 |  3 +--
>   arch/mips/include/asm/pgalloc.h              |  6 ++----
>   arch/nds32/include/asm/pgalloc.h             |  5 ++---
>   arch/nios2/include/asm/pgalloc.h             |  6 ++----
>   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
>   arch/openrisc/mm/ioremap.c                   |  3 +--
>   arch/parisc/include/asm/pgalloc.h            |  4 ++--
>   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
>   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
>   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
>   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
>   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
>   arch/riscv/include/asm/pgalloc.h             |  6 ++----
>   arch/s390/include/asm/pgalloc.h              |  4 ++--
>   arch/sh/include/asm/pgalloc.h                |  6 ++----
>   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
>   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
>   arch/sparc/mm/init_64.c                      |  6 ++----
>   arch/sparc/mm/srmmu.c                        |  4 ++--
>   arch/um/kernel/mem.c                         |  4 ++--

There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h

This patch missed it.

>   arch/unicore32/include/asm/pgalloc.h         |  4 ++--
>   arch/x86/include/asm/pgalloc.h               |  4 ++--
>   arch/x86/mm/pgtable.c                        |  4 ++--
>   arch/xtensa/include/asm/pgalloc.h            |  8 +++-----
>   include/linux/mm.h                           | 13 ++++++-------
>   mm/huge_memory.c                             |  8 ++++----
>   mm/kasan/kasan_init.c                        |  2 +-
>   mm/memory.c                                  | 17 ++++++++---------
>   mm/migrate.c                                 |  2 +-
>   mm/mremap.c                                  |  2 +-
>   mm/userfaultfd.c                             |  2 +-
>   virt/kvm/arm/mmu.c                           |  2 +-
>   43 files changed, 95 insertions(+), 145 deletions(-)
>
> diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
> index ab3e3a8638fb..02f9f91bb4f0 100644
> --- a/arch/alpha/include/asm/pgalloc.h
> +++ b/arch/alpha/include/asm/pgalloc.h
> @@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> @@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
> -	pte_t *pte = pte_alloc_one_kernel(mm, address);
> +	pte_t *pte = pte_alloc_one_kernel(mm);
>   	struct page *page;
>   
>   	if (!pte)
> diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
> index 3749234b7419..9c9b5a5ebf2e 100644
> --- a/arch/arc/include/asm/pgalloc.h
> +++ b/arch/arc/include/asm/pgalloc.h
> @@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
>   	return get_order(PTRS_PER_PTE * sizeof(pte_t));
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte_pg;
>   	struct page *page;
> diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
> index 2d7344f0e208..17ab72f0cc4e 100644
> --- a/arch/arm/include/asm/pgalloc.h
> +++ b/arch/arm/include/asm/pgalloc.h
> @@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
>    *  +------------+
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..52fa47c73bf0 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP);
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
> index eeebf862c46c..d36183887b60 100644
> --- a/arch/hexagon/include/asm/pgalloc.h
> +++ b/arch/hexagon/include/asm/pgalloc.h
> @@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> @@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   }
>   
>   /* _kernel variant gets to use a different allocator */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
>   	return (pte_t *) __get_free_page(flags);
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index 3ee5362f2661..c9e481023c25 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
>   	pmd_val(*pmd_entry) = __pa(pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> @@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
>   	return page;
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(0, GFP_KERNEL, NULL);
>   }
> diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> index 12fe700632f4..4399d712f6db 100644
> --- a/arch/m68k/include/asm/mcf_pgalloc.h
> +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   
>   extern const char bad_pmd_string[];
>   
> -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_DMA);
>   
> @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
>   #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
>   
> -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> -
>   #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
>   	(unsigned long)(page_address(page)))
>   
> @@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
>   
>   #define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_pages(GFP_DMA, 0);
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
> index 7859a86319cf..d04d9ba9b976 100644
> --- a/arch/m68k/include/asm/motorola_pgalloc.h
> +++ b/arch/m68k/include/asm/motorola_pgalloc.h
> @@ -8,7 +8,7 @@
>   extern pmd_t *get_pointer_table(void);
>   extern int free_pointer_table(pmd_t *);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   	free_page((unsigned long) pte);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
> index 11485d38de4e..1456c5eecbd9 100644
> --- a/arch/m68k/include/asm/sun3_pgalloc.h
> +++ b/arch/m68k/include/asm/sun3_pgalloc.h
> @@ -35,8 +35,7 @@ do {							\
>   	tlb_remove_page((tlb), pte);			\
>   } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	unsigned long page = __get_free_page(GFP_KERNEL);
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return (pte_t *) (page);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>           struct page *page = alloc_pages(GFP_KERNEL, 0);
>   
> diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
> index 7c89390c0c13..f4cc9ffc449e 100644
> --- a/arch/microblaze/include/asm/pgalloc.h
> +++ b/arch/microblaze/include/asm/pgalloc.h
> @@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
>   #define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
>   #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -		unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> @@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
>   	return ptepage;
>   }
>   
> -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
> -		unsigned long address)
> -{
> -	unsigned long *ret;
> -
> -	ret = pte_quicklist;
> -	if (ret != NULL) {
> -		pte_quicklist = (unsigned long *)(*ret);
> -		ret[0] = 0;
> -		pgtable_cache_size--;
> -	}
> -	return (pte_t *)ret;
> -}
> -
>   static inline void pte_free_fast(pte_t *pte)
>   {
>   	*(unsigned long **)pte = pte_quicklist;
> diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
> index 7f525962cdfa..c2ce1e42b888 100644
> --- a/arch/microblaze/mm/pgtable.c
> +++ b/arch/microblaze/mm/pgtable.c
> @@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
>   	return pa;
>   }
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -		unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	if (mem_init_done) {
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 39b9f311c4ef..27808d9461f4 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
> index 27448869131a..3c5fee5b5759 100644
> --- a/arch/nds32/include/asm/pgalloc.h
> +++ b/arch/nds32/include/asm/pgalloc.h
> @@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
>   
>   #define check_pgt_cache()		do { } while (0)
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long addr)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pgtable_t pte;
>   
> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
> index bb47d08c8ef7..3a149ead1207 100644
> --- a/arch/nios2/include/asm/pgalloc.h
> +++ b/arch/nios2/include/asm/pgalloc.h
> @@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_pages((unsigned long)pgd, PGD_ORDER);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
> index 8999b9226512..149c82ee4b8b 100644
> --- a/arch/openrisc/include/asm/pgalloc.h
> +++ b/arch/openrisc/include/asm/pgalloc.h
> @@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   	pte = alloc_pages(GFP_KERNEL, 0);
> diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
> index 2175e4bfd9fc..24fb1021c75a 100644
> --- a/arch/openrisc/mm/ioremap.c
> +++ b/arch/openrisc/mm/ioremap.c
> @@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
>    * the memblock infrastructure.
>    */
>   
> -pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
> index cf13275f7c6d..d05c678c77c4 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>   	if (!page)
> @@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
>   }
>   
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
>   	return pte;
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 82e44b1a00ae..af9e13555d95 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 391ed2c3b697..8f1d92e99fe5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
>   			pgtable_cache[(shift) - 1];	\
>   		})
>   
> -extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
> +extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
>   extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
>   extern void pte_fragment_free(unsigned long *, int);
>   extern void pmd_fragment_free(unsigned long *);
> @@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
>   	return (pgtable_t)pmd_page_vaddr(pmd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
> -	return (pte_t *)pte_fragment_alloc(mm, address, 1);
> +	return (pte_t *)pte_fragment_alloc(mm, 1);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
> -	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
> +	return (pgtable_t)pte_fragment_alloc(mm, 0);
>   }
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index 8825953c225b..16623f53f0d4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   #define pmd_pgtable(pmd) pmd_page(pmd)
>   #endif
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
> -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +extern pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> index e2d62d033708..2e7e0230edf4 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> @@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   }
>   
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -				      unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	pte_t *pte;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..cff1d426ca6a 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -379,7 +379,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
>   	return (pte_t *)ret;
>   }
>   
> -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
>   {
>   	pte_t *pte;
>   
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 120a49bfb9c6..b99a89cdcc5e 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
>   
>   extern char etext[], _stext[], _sinittext[], _einittext[];
>   
> -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *ptepage;
>   
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index a79ed5faff3a..94043cf83c90 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -	unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(
>   		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>   }
>   
> -static inline struct page *pte_alloc_one(struct mm_struct *mm,
> -	unsigned long address)
> +static inline struct page *pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
> index f0f9bcf94c03..ce2ca8cbd2ec 100644
> --- a/arch/s390/include/asm/pgalloc.h
> +++ b/arch/s390/include/asm/pgalloc.h
> @@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
>   /*
>    * page table entry allocation/free routines.
>    */
> -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
> +#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
> +#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
>   
>   #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
>   #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
> index ed053a359ab7..8ad73cb31121 100644
> --- a/arch/sh/include/asm/pgalloc.h
> +++ b/arch/sh/include/asm/pgalloc.h
> @@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
>   /*
>    * Allocate and free page tables.
>    */
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page;
>   	void *pg;
> diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
> index 90459481c6c7..282be50a4adf 100644
> --- a/arch/sparc/include/asm/pgalloc_32.h
> +++ b/arch/sparc/include/asm/pgalloc_32.h
> @@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
>   void pmd_set(pmd_t *pmdp, pte_t *ptep);
>   #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					  unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
>   }
> diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
> index 874632f34f62..48abccba4991 100644
> --- a/arch/sparc/include/asm/pgalloc_64.h
> +++ b/arch/sparc/include/asm/pgalloc_64.h
> @@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>   	kmem_cache_free(pgtable_cache, pmd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address);
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address);
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
> +pgtable_t pte_alloc_one(struct mm_struct *mm);
>   void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
>   void pte_free(struct mm_struct *mm, pgtable_t ptepage);
>   
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f396048a0d68..6133f21811e9 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2921,8 +2921,7 @@ void __flush_tlb_all(void)
>   			     : : "r" (pstate));
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -			    unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	pte_t *pte = NULL;
> @@ -2933,8 +2932,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm,
> -			unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>   	if (!page)
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index be9cb0065179..ce67a96e70c3 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
>    * Alignments up to the page size are the same for physical and virtual
>    * addresses of the nocache area.
>    */
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	unsigned long pte;
>   	struct page *page;
>   
> -	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
> +	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
>   		return NULL;
>   	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
>   	if (!pgtable_page_ctor(page)) {
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 3c0e470ea646..1f277191fbf3 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -197,7 +197,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long) pgd);
>   }
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -205,7 +205,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>   	return pte;
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
> index f0fdb268f8f2..7cceabecf4e3 100644
> --- a/arch/unicore32/include/asm/pgalloc.h
> +++ b/arch/unicore32/include/asm/pgalloc.h
> @@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
>    * Allocate one PTE table.
>    */
>   static inline pte_t *
> -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   
> @@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
>   }
>   
>   static inline pgtable_t
> -pte_alloc_one(struct mm_struct *mm, unsigned long addr)
> +pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
> index fbd578daa66e..5068e85165b2 100644
> --- a/arch/x86/include/asm/pgalloc.h
> +++ b/arch/x86/include/asm/pgalloc.h
> @@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
>   extern pgd_t *pgd_alloc(struct mm_struct *);
>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>   
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);
>   
>   /* Should really implement gc for free page table pages. This could be
>      done with a reference count in struct page. */
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 089e78c4effd..a2eff247377b 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
>   
>   gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
>   
> -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
>   }
>   
> -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	struct page *pte;
>   
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index 1065bc8bcae5..b3b388ff2f01 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>   	free_page((unsigned long)pgd);
>   }
>   
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> -					 unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   {
>   	pte_t *ptep;
>   	int i;
> @@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
>   	return ptep;
>   }
>   
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> -					unsigned long addr)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
>   {
>   	pte_t *pte;
>   	struct page *page;
>   
> -	pte = pte_alloc_one_kernel(mm, addr);
> +	pte = pte_alloc_one_kernel(mm);
>   	if (!pte)
>   		return NULL;
>   	page = virt_to_page(pte);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0416a7204be3..89c2b1739a69 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1789,8 +1789,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
>   static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
>   #endif
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
> +int __pte_alloc_kernel(pmd_t *pmd);
>   
>   /*
>    * The following ifdef needed to get the 4level-fixup.h header to work.
> @@ -1928,18 +1928,17 @@ static inline void pgtable_page_dtor(struct page *page)
>   	pte_unmap(pte);					\
>   } while (0)
>   
> -#define pte_alloc(mm, pmd, address)			\
> -	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
> +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
>   
>   #define pte_alloc_map(mm, pmd, address)			\
> -	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
> +	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
>   
>   #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	(pte_alloc(mm, pmd, address) ?			\
> +	(pte_alloc(mm, pmd) ?			\
>   		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
>   
>   #define pte_alloc_kernel(pmd, address)			\
> -	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
> +	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   		NULL: pte_offset_kernel(pmd, address))
>   
>   #if USE_SPLIT_PMD_PTLOCKS
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 00704060b7f7..fd7e8714e5a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>   		return VM_FAULT_FALLBACK;
>   	}
>   
> -	pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +	pgtable = pte_alloc_one(vma->vm_mm);
>   	if (unlikely(!pgtable)) {
>   		ret = VM_FAULT_OOM;
>   		goto release;
> @@ -683,7 +683,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>   		struct page *zero_page;
>   		bool set;
>   		vm_fault_t ret;
> -		pgtable = pte_alloc_one(vma->vm_mm, haddr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (unlikely(!pgtable))
>   			return VM_FAULT_OOM;
>   		zero_page = mm_get_huge_zero_page(vma->vm_mm);
> @@ -772,7 +772,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>   		return VM_FAULT_SIGBUS;
>   
>   	if (arch_needs_pgtable_deposit()) {
> -		pgtable = pte_alloc_one(vma->vm_mm, addr);
> +		pgtable = pte_alloc_one(vma->vm_mm);
>   		if (!pgtable)
>   			return VM_FAULT_OOM;
>   	}
> @@ -910,7 +910,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>   	if (!vma_is_anonymous(vma))
>   		return 0;
>   
> -	pgtable = pte_alloc_one(dst_mm, addr);
> +	pgtable = pte_alloc_one(dst_mm);
>   	if (unlikely(!pgtable))
>   		goto out;
>   
> diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
> index 7a2a2f13f86f..272849cd2007 100644
> --- a/mm/kasan/kasan_init.c
> +++ b/mm/kasan/kasan_init.c
> @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
>   			pte_t *p;
>   
>   			if (slab_is_available())
> -				p = pte_alloc_one_kernel(&init_mm, addr);
> +				p = pte_alloc_one_kernel(&init_mm);
>   			else
>   				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>   			if (!p)
> diff --git a/mm/memory.c b/mm/memory.c
> index c467102a5cbc..3afdcf38993d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -647,10 +647,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   	}
>   }
>   
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
>   {
>   	spinlock_t *ptl;
> -	pgtable_t new = pte_alloc_one(mm, address);
> +	pgtable_t new = pte_alloc_one(mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -681,9 +681,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>   	return 0;
>   }
>   
> -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
> +int __pte_alloc_kernel(pmd_t *pmd)
>   {
> -	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
> +	pte_t *new = pte_alloc_one_kernel(&init_mm);
>   	if (!new)
>   		return -ENOMEM;
>   
> @@ -3139,7 +3139,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
> +	if (pte_alloc(vma->vm_mm, vmf->pmd))
>   		return VM_FAULT_OOM;
>   
>   	/* See the comment in pte_alloc_one_map() */
> @@ -3286,7 +3286,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
>   		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>   		spin_unlock(vmf->ptl);
>   		vmf->prealloc_pte = NULL;
> -	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
> +	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
>   		return VM_FAULT_OOM;
>   	}
>   map_pte:
> @@ -3365,7 +3365,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>   	 * related to pte entry. Use the preallocated table for that.
>   	 */
>   	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
> -		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			return VM_FAULT_OOM;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> @@ -3603,8 +3603,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
>   			start_pgoff + nr_pages - 1);
>   
>   	if (pmd_none(*vmf->pmd)) {
> -		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
> -						  vmf->address);
> +		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
>   		if (!vmf->prealloc_pte)
>   			goto out;
>   		smp_wmb(); /* See comment in __pte_alloc() */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 84381b55b2bd..3080b0626026 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2605,7 +2605,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>   	 *
>   	 * Here we only have down_read(mmap_sem).
>   	 */
> -	if (pte_alloc(mm, pmdp, addr))
> +	if (pte_alloc(mm, pmdp))
>   		goto abort;
>   
>   	/* See the comment in pte_alloc_one_map() */
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 5c2e18505f75..9e68a02a52b1 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -240,7 +240,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
>   		}
> -		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
> +		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;
>   		if (extent > next - new_addr)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5029f241908f..f05c8bc38ca5 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -513,7 +513,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
>   			break;
>   		}
>   		if (unlikely(pmd_none(dst_pmdval)) &&
> -		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
> +		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
>   			err = -ENOMEM;
>   			break;
>   		}
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index ed162a6c57c5..3f8180414301 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
>   		BUG_ON(pmd_sect(*pmd));
>   
>   		if (pmd_none(*pmd)) {
> -			pte = pte_alloc_one_kernel(NULL, addr);
> +			pte = pte_alloc_one_kernel(NULL);
>   			if (!pte) {
>   				kvm_err("Cannot allocate Hyp pte\n");
>   				return -ENOMEM;

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12  1:37   ` Joel Fernandes (Google)
                       ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 14:09     ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, linux-riscv, elfring, Jonas Bonn, linux-s390,
	dancol, Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: linux-riscv

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan at kernel.org
> Cc: pantin at google.com
> Cc: hughd at google.com
> Cc: lokeshgidra at google.com
> Cc: dancol at google.com
> Cc: mhocko at kernel.org
> Cc: kirill at shutemov.name
> Cc: akpm at linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, linux-parisc, pantin, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, linux-parisc, pantin, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: linux-snps-arc

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan at kernel.org
> Cc: pantin at google.com
> Cc: hughd at google.com
> Cc: lokeshgidra at google.com
> Cc: dancol at google.com
> Cc: mhocko at kernel.org
> Cc: kirill at shutemov.name
> Cc: akpm at linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:09     ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:09 UTC (permalink / raw)
  To: openrisc

On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
>
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
>
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
>
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
>
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
>
> Cc: minchan at kernel.org
> Cc: pantin at google.com
> Cc: hughd at google.com
> Cc: lokeshgidra at google.com
> Cc: dancol at google.com
> Cc: mhocko at kernel.org
> Cc: kirill at shutemov.name
> Cc: akpm at linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 62 insertions(+)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		drop_rmap_locks(vma);
>   }
>   
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);
> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);

UML does not have set_pmd_at at all

If I read the code right, MIPS completely ignores the address argument 
so set_pmd_at there may not have the effect which this patch is trying 
to achieve.

IMHO, this needs to be a per-architecture, not across full tree.

> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>   unsigned long move_page_tables(struct vm_area_struct *vma,
>   		unsigned long old_addr, struct vm_area_struct *new_vma,
>   		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>   			split_huge_pmd(vma, old_pmd, old_addr);
>   			if (pmd_trans_unstable(old_pmd))
>   				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>   		}
> +
>   		if (pte_alloc(new_vma->vm_mm, new_pmd))
>   			break;
>   		next = (new_addr + PMD_SIZE) & PMD_MASK;


Brgds,


A.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 14:09     ` Anton Ivanov
                         ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan at kernel.org
> > Cc: pantin at google.com
> > Cc: hughd at google.com
> > Cc: lokeshgidra at google.com
> > Cc: dancol at google.com
> > Cc: mhocko at kernel.org
> > Cc: kirill at shutemov.name
> > Cc: akpm at linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan at kernel.org
> > Cc: pantin at google.com
> > Cc: hughd at google.com
> > Cc: lokeshgidra at google.com
> > Cc: dancol at google.com
> > Cc: mhocko at kernel.org
> > Cc: kirill at shutemov.name
> > Cc: akpm at linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:37       ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 14:37 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan at kernel.org
> > Cc: pantin at google.com
> > Cc: hughd at google.com
> > Cc: lokeshgidra at google.com
> > Cc: dancol at google.com
> > Cc: mhocko at kernel.org
> > Cc: kirill at shutemov.name
> > Cc: akpm at linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >   		drop_rmap_locks(vma);
> >   }
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> 
> UML does not have set_pmd_at at all

Every architecture does. :)

But it may come not from the arch code.

> If I read the code right, MIPS completely ignores the address argument so
> set_pmd_at there may not have the effect which this patch is trying to
> achieve.

Ignoring address is fine. Most architectures do that..
The ideas is to move page table to the new pmd slot. It's nothing to do
with the address passed to set_pmd_at().

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 14:37       ` Kirill A. Shutemov
                           ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 14:48         ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan@kernel.org
>>> Cc: pantin@google.com
>>> Cc: hughd@google.com
>>> Cc: lokeshgidra@google.com
>>> Cc: dancol@google.com
>>> Cc: mhocko@kernel.org
>>> Cc: kirill@shutemov.name
>>> Cc: akpm@linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan@kernel.org
>>> Cc: pantin@google.com
>>> Cc: hughd@google.com
>>> Cc: lokeshgidra@google.com
>>> Cc: dancol@google.com
>>> Cc: mhocko@kernel.org
>>> Cc: kirill@shutemov.name
>>> Cc: akpm@linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan@kernel.org
>>> Cc: pantin@google.com
>>> Cc: hughd@google.com
>>> Cc: lokeshgidra@google.com
>>> Cc: dancol@google.com
>>> Cc: mhocko@kernel.org
>>> Cc: kirill@shutemov.name
>>> Cc: akpm@linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: linux-riscv

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan at kernel.org
>>> Cc: pantin at google.com
>>> Cc: hughd at google.com
>>> Cc: lokeshgidra at google.com
>>> Cc: dancol at google.com
>>> Cc: mhocko at kernel.org
>>> Cc: kirill at shutemov.name
>>> Cc: akpm at linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan@kernel.org
>>> Cc: pantin@google.com
>>> Cc: hughd@google.com
>>> Cc: lokeshgidra@google.com
>>> Cc: dancol@google.com
>>> Cc: mhocko@kernel.org
>>> Cc: kirill@shutemov.name
>>> Cc: akpm@linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan@kernel.org
>>> Cc: pantin@google.com
>>> Cc: hughd@google.com
>>> Cc: lokeshgidra@google.com
>>> Cc: dancol@google.com
>>> Cc: mhocko@kernel.org
>>> Cc: kirill@shutemov.name
>>> Cc: akpm@linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: linux-snps-arc

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan at kernel.org
>>> Cc: pantin at google.com
>>> Cc: hughd at google.com
>>> Cc: lokeshgidra at google.com
>>> Cc: dancol at google.com
>>> Cc: mhocko at kernel.org
>>> Cc: kirill at shutemov.name
>>> Cc: akpm at linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 14:48         ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 14:48 UTC (permalink / raw)
  To: openrisc

On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>> Android needs to mremap large regions of memory during memory management
>>> related operations. The mremap system call can be really slow if THP is
>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>> pte at a time, and can be really slow across a large map. Turning on THP
>>> may not be a viable option, and is not for us. This patch speeds up the
>>> performance for non-THP system by copying at the PMD level when possible.
>>>
>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>> completion times drops from 160-250 millesconds to 380-400 microseconds.
>>>
>>> Before:
>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>
>>> After:
>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>
>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>> tlb every time we do this optimization since I couldn't find a way to
>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>> doing so is not much compared the improvement, on both x86-64 and arm64.
>>>
>>> Cc: minchan at kernel.org
>>> Cc: pantin at google.com
>>> Cc: hughd at google.com
>>> Cc: lokeshgidra at google.com
>>> Cc: dancol at google.com
>>> Cc: mhocko at kernel.org
>>> Cc: kirill at shutemov.name
>>> Cc: akpm at linux-foundation.org
>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>> ---
>>>    mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 62 insertions(+)
>>>
>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>> index 9e68a02a52b1..d82c485822ef 100644
>>> --- a/mm/mremap.c
>>> +++ b/mm/mremap.c
>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>>>    		drop_rmap_locks(vma);
>>>    }
>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>>> +		  unsigned long new_addr, unsigned long old_end,
>>> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>> +{
>>> +	spinlock_t *old_ptl, *new_ptl;
>>> +	struct mm_struct *mm = vma->vm_mm;
>>> +
>>> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>> +	    || old_end - old_addr < PMD_SIZE)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The destination pmd shouldn't be established, free_pgtables()
>>> +	 * should have release it.
>>> +	 */
>>> +	if (WARN_ON(!pmd_none(*new_pmd)))
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * We don't have to worry about the ordering of src and dst
>>> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
>>> +	 */
>>> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>> +	if (old_ptl) {
>>> +		pmd_t pmd;
>>> +
>>> +		new_ptl = pmd_lockptr(mm, new_pmd);
>>> +		if (new_ptl != old_ptl)
>>> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>> +
>>> +		/* Clear the pmd */
>>> +		pmd = *old_pmd;
>>> +		pmd_clear(old_pmd);
>>> +
>>> +		VM_BUG_ON(!pmd_none(*new_pmd));
>>> +
>>> +		/* Set the new pmd */
>>> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
>> UML does not have set_pmd_at at all
> Every architecture does. :)

I tried to build it patching vs 4.19-rc before I made this statement and 
ran into that.

Presently it does not.

https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not 
on the list.

>
> But it may come not from the arch code.

There is no generic definition as far as I can see. All 12 defines in 
4.19 are in arch specific code. Unless i am missing something...

>
>> If I read the code right, MIPS completely ignores the address argument so
>> set_pmd_at there may not have the effect which this patch is trying to
>> achieve.
> Ignoring address is fine. Most architectures do that..
> The ideas is to move page table to the new pmd slot. It's nothing to do
> with the address passed to set_pmd_at().

If that is it's only function, then I am going to appropriate the code 
out of the MIPS tree for further uml testing. It does exactly that - 
just move the pmd the new slot.

>
A.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12 13:56   ` Anton Ivanov
                       ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 16:34     ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring@users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring@users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, sparclinux,
	linux-xtensa, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring@users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring at users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, linux-parisc, pantin, linux-kernel, Fenghua Yu,
	minchan, Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring@users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, Max Filippov,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, linux-parisc, pantin, linux-kernel, Fenghua Yu,
	minchan, Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring@users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill at shutemov.name>
> > Cc: Michal Hocko <mhocko at kernel.org>
> > Cc: Julia Lawall <Julia.Lawall at lip6.fr>
> > Cc: elfring at users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:34     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:34 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 02:56:19PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > This series speeds up mremap(2) syscall by copying page tables at the
> > PMD level even for non-THP systems. There is concern that the extra
> > 'address' argument that mremap passes to pte_alloc may do something
> > subtle architecture related in the future, that makes the scheme not
> > work.  Also we find that there is no point in passing the 'address' to
> > pte_alloc since its unused.
> > 
> > This patch therefore removes this argument tree-wide resulting in a nice
> > negative diff as well. Also ensuring along the way that the architecture
> > does not do anything funky with 'address' argument that goes unnoticed.
> > 
> > Build and boot tested on x86-64. Build tested on arm64.
> > 
> > The changes were obtained by applying the following Coccinelle script.
> > The pte_fragment_alloc was manually fixed up since it was only 2
> > occurences and could not be easily generalized (and thanks Julia for
> > answering all my silly and not-silly Coccinelle questions!).
> > 
> > // Options: --include-headers --no-includes
> > // Note: I split the 'identifier fn' line, so if you are manually
> > // running it, please unsplit it so it runs for you.
> > 
> > virtual patch
> > 
> > @pte_alloc_func_def depends on patch exists@
> > identifier E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > type T2;
> > @@
> > 
> >   fn(...
> > - , T2 E2
> >   )
> >   { ... }
> > 
> > @pte_alloc_func_proto depends on patch exists@
> > identifier E1, E2, E4;
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> > 
> > @pte_alloc_func_call depends on patch exists@
> > expression E2;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> > 
> >   fn(...
> > -,  E2
> >   )
> > 
> > @pte_alloc_macro depends on patch exists@
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > identifier a, b, c;
> > expression e;
> > position p;
> > @@
> > 
> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> > 
> > Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Julia Lawall <Julia.Lawall@lip6.fr>
> > Cc: elfring at users.sourceforge.net
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >   arch/alpha/include/asm/pgalloc.h             |  6 +++---
> >   arch/arc/include/asm/pgalloc.h               |  5 ++---
> >   arch/arm/include/asm/pgalloc.h               |  4 ++--
> >   arch/arm64/include/asm/pgalloc.h             |  4 ++--
> >   arch/hexagon/include/asm/pgalloc.h           |  6 ++----
> >   arch/ia64/include/asm/pgalloc.h              |  5 ++---
> >   arch/m68k/include/asm/mcf_pgalloc.h          |  8 ++------
> >   arch/m68k/include/asm/motorola_pgalloc.h     |  4 ++--
> >   arch/m68k/include/asm/sun3_pgalloc.h         |  6 ++----
> >   arch/microblaze/include/asm/pgalloc.h        | 19 ++-----------------
> >   arch/microblaze/mm/pgtable.c                 |  3 +--
> >   arch/mips/include/asm/pgalloc.h              |  6 ++----
> >   arch/nds32/include/asm/pgalloc.h             |  5 ++---
> >   arch/nios2/include/asm/pgalloc.h             |  6 ++----
> >   arch/openrisc/include/asm/pgalloc.h          |  5 ++---
> >   arch/openrisc/mm/ioremap.c                   |  3 +--
> >   arch/parisc/include/asm/pgalloc.h            |  4 ++--
> >   arch/powerpc/include/asm/book3s/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 +++++-------
> >   arch/powerpc/include/asm/nohash/32/pgalloc.h |  4 ++--
> >   arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 ++----
> >   arch/powerpc/mm/pgtable-book3s64.c           |  2 +-
> >   arch/powerpc/mm/pgtable_32.c                 |  4 ++--
> >   arch/riscv/include/asm/pgalloc.h             |  6 ++----
> >   arch/s390/include/asm/pgalloc.h              |  4 ++--
> >   arch/sh/include/asm/pgalloc.h                |  6 ++----
> >   arch/sparc/include/asm/pgalloc_32.h          |  5 ++---
> >   arch/sparc/include/asm/pgalloc_64.h          |  6 ++----
> >   arch/sparc/mm/init_64.c                      |  6 ++----
> >   arch/sparc/mm/srmmu.c                        |  4 ++--
> >   arch/um/kernel/mem.c                         |  4 ++--
> 
> There is a declaration of pte_alloc_one in arch/um/include/asm/pgalloc.h
> 
> This patch missed it.

Ah, true. Thanks. Couldn't test every arch obviously. The reason this was
missed is the script could not find matches with prototypes without named
parameters:

extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

I wrote something like this as below but it failed to compile, Julia any
suggestions on how to express this?

@pte_alloc_func_proto depends on patch exists@
type T1, T2, T3, T4;
identifier fn =~
"^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
@@

(
- T3 fn(T1, T2);
+ T3 fn(T1);
|
- T3 fn(T1, T2, T4);
+ T3 fn(T1, T2);
)

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12 11:09   ` Kirill A. Shutemov
                       ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 16:37     ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-kernel, kernel-team, Michal Hocko, Julia Lawall, elfring,
	Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, dancol, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, hughd, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, pantin, lokeshgidra, Max Filippov, minchan,
	nios2-dev, openrisc, Peter Zijlstra, Richard Weinberger,
	Rich Felker, Sam Creasey, sparclinux, Stafford Horne,
	Stefan Kristiansson, Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, akpm

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, lokeshgidra, sparclinux, linux-riscv, elfring,
	Jonas Bonn, linux-s390, dancol, Yoshinori Sato, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018@06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:37     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:37 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 02:09:06PM +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 11, 2018 at 06:37:55PM -0700, Joel Fernandes (Google) wrote:
> > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
> > index 12fe700632f4..4399d712f6db 100644
> > --- a/arch/m68k/include/asm/mcf_pgalloc.h
> > +++ b/arch/m68k/include/asm/mcf_pgalloc.h
> > @@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >  
> >  extern const char bad_pmd_string[];
> >  
> > -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > -	unsigned long address)
> > +extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >  {
> >  	unsigned long page = __get_free_page(GFP_DMA);
> >  
> > @@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
> >  #define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
> >  #define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
> >  
> > -#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
> > -
> 
> I believe this was one done manually, right?
> Please explicitely state everthing you did on not of sematic patch

Ok, I can update the changelog with that information next time I send it.
This is the only thing I didn't mention in the changelog since it was a
trivial unused function deletion.. but I mentioned everything else..

And sir, you are one thorough reviewer! ;-)

 - Joel

[..]

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12 16:34     ` Joel Fernandes
                         ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 16:38       ` Julia Lawall
  -1 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Anton Ivanov, linux-kernel, linux-mips, Rich Felker, linux-ia64,
	linux-sh, Peter Zijlstra, Catalin Marinas, Dave Hansen,
	Will Deacon, Michal Hocko, linux-mm, lokeshgidra, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	sparclinux, linux-xtensa, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: linux-riscv

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov, Ingo Molnar,
	kvmarm, Geert Uytterhoeven, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, openrisc, Borislav Petkov,
	Andy Lutomirski, Andrey Ryabinin, kirill, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov, Ingo Molnar,
	kvmarm, Geert Uytterhoeven, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, openrisc, Borislav Petkov,
	Andy Lutomirski, Andrey Ryabinin, kirill, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, nios2-dev, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: linux-snps-arc

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:38       ` Julia Lawall
  0 siblings, 0 replies; 317+ messages in thread
From: Julia Lawall @ 2018-10-12 16:38 UTC (permalink / raw)
  To: openrisc

> I wrote something like this as below but it failed to compile, Julia any
> suggestions on how to express this?
>
> @pte_alloc_func_proto depends on patch exists@
> type T1, T2, T3, T4;
> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> @@
>
> (
> - T3 fn(T1, T2);
> + T3 fn(T1);
> |
> - T3 fn(T1, T2, T4);
> + T3 fn(T1, T2);
> )

What goes wrong?  It seems fine to me.

julia

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 14:48         ` Anton Ivanov
                             ` (8 preceding siblings ...)
  (?)
@ 2018-10-12 16:42           ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch, Size: 1812 bytes --]

>From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Fri, 12 Oct 2018 17:24:10 +0100
Subject: [PATCH] Incremental fixes to the mmremap patch

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/pgalloc.h | 4 ++--
 arch/um/include/asm/pgtable.h | 3 +++
 arch/um/kernel/tlb.c          | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 7485398d0737..1692da55e63a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,4 +359,7 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+
 #endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 763d35bdda01..d17b74184ba0 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -647,3 +647,9 @@ void force_flush_all(void)
 		vma = vma->vm_next;
 	}
 }
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
-- 
2.11.0


[-- Attachment #3: Type: text/plain, Size: 169 bytes --]

_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch, Size: 1811 bytes --]

From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Fri, 12 Oct 2018 17:24:10 +0100
Subject: [PATCH] Incremental fixes to the mmremap patch

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/pgalloc.h | 4 ++--
 arch/um/include/asm/pgtable.h | 3 +++
 arch/um/kernel/tlb.c          | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 7485398d0737..1692da55e63a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,4 +359,7 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+
 #endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 763d35bdda01..d17b74184ba0 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -647,3 +647,9 @@ void force_flush_all(void)
 		vma = vma->vm_next;
 	}
 }
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
-- 
2.11.0


[-- Attachment #3: Type: text/plain, Size: 169 bytes --]

_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.




^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: linux-riscv


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan at kernel.org
>>>> Cc: pantin at google.com
>>>> Cc: hughd at google.com
>>>> Cc: lokeshgidra at google.com
>>>> Cc: dancol at google.com
>>>> Cc: mhocko at kernel.org
>>>> Cc: kirill at shutemov.name
>>>> Cc: akpm at linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>> ?? mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> ?? 1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>> ?????????? drop_rmap_locks(vma);
>>>> ?? }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +????????? unsigned long new_addr, unsigned long old_end,
>>>> +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +??? spinlock_t *old_ptl, *new_ptl;
>>>> +??? struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +??????? || old_end - old_addr < PMD_SIZE)
>>>> +??????? return false;
>>>> +
>>>> +??? /*
>>>> +???? * The destination pmd shouldn't be established, free_pgtables()
>>>> +???? * should have release it.
>>>> +???? */
>>>> +??? if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +??????? return false;
>>>> +
>>>> +??? /*
>>>> +???? * We don't have to worry about the ordering of src and dst
>>>> +???? * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +???? */
>>>> +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +??? if (old_ptl) {
>>>> +??????? pmd_t pmd;
>>>> +
>>>> +??????? new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +??????? if (new_ptl != old_ptl)
>>>> +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +??????? /* Clear the pmd */
>>>> +??????? pmd = *old_pmd;
>>>> +??????? pmd_clear(old_pmd);
>>>> +
>>>> +??????? VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +??????? /* Set the new pmd */
>>>> +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Incremental-fixes-to-the-mmremap-patch.patch
Type: text/x-patch
Size: 1812 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-riscv/attachments/20181012/f5d1a300/attachment.bin>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch, Size: 1811 bytes --]

From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Fri, 12 Oct 2018 17:24:10 +0100
Subject: [PATCH] Incremental fixes to the mmremap patch

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/pgalloc.h | 4 ++--
 arch/um/include/asm/pgtable.h | 3 +++
 arch/um/kernel/tlb.c          | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 7485398d0737..1692da55e63a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,4 +359,7 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+
 #endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 763d35bdda01..d17b74184ba0 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -647,3 +647,9 @@ void force_flush_all(void)
 		vma = vma->vm_next;
 	}
 }
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
-- 
2.11.0


[-- Attachment #3: Type: text/plain, Size: 161 bytes --]

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>> A A  mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> A A  1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>> A A A A A A A A A A  drop_rmap_locks(vma);
>>>> A A  }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +A A A A A A A A A  unsigned long new_addr, unsigned long old_end,
>>>> +A A A A A A A A A  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +A A A  spinlock_t *old_ptl, *new_ptl;
>>>> +A A A  struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +A A A  if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +A A A A A A A  || old_end - old_addr < PMD_SIZE)
>>>> +A A A A A A A  return false;
>>>> +
>>>> +A A A  /*
>>>> +A A A A  * The destination pmd shouldn't be established, free_pgtables()
>>>> +A A A A  * should have release it.
>>>> +A A A A  */
>>>> +A A A  if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +A A A A A A A  return false;
>>>> +
>>>> +A A A  /*
>>>> +A A A A  * We don't have to worry about the ordering of src and dst
>>>> +A A A A  * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +A A A A  */
>>>> +A A A  old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +A A A  if (old_ptl) {
>>>> +A A A A A A A  pmd_t pmd;
>>>> +
>>>> +A A A A A A A  new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +A A A A A A A  if (new_ptl != old_ptl)
>>>> +A A A A A A A A A A A  spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +A A A A A A A  /* Clear the pmd */
>>>> +A A A A A A A  pmd = *old_pmd;
>>>> +A A A A A A A  pmd_clear(old_pmd);
>>>> +
>>>> +A A A A A A A  VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +A A A A A A A  /* Set the new pmd */
>>>> +A A A A A A A  set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch; name="0001-Incremental-fixes-to-the-mmremap-patch.patch", Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch, Size: 1811 bytes --]

From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Fri, 12 Oct 2018 17:24:10 +0100
Subject: [PATCH] Incremental fixes to the mmremap patch

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/pgalloc.h | 4 ++--
 arch/um/include/asm/pgtable.h | 3 +++
 arch/um/kernel/tlb.c          | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 7485398d0737..1692da55e63a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,4 +359,7 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+
 #endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 763d35bdda01..d17b74184ba0 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -647,3 +647,9 @@ void force_flush_all(void)
 		vma = vma->vm_next;
 	}
 }
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: linux-snps-arc


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan at kernel.org
>>>> Cc: pantin at google.com
>>>> Cc: hughd at google.com
>>>> Cc: lokeshgidra at google.com
>>>> Cc: dancol at google.com
>>>> Cc: mhocko at kernel.org
>>>> Cc: kirill at shutemov.name
>>>> Cc: akpm at linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
>>>> ---
>>>> ?? mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> ?? 1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>> ?????????? drop_rmap_locks(vma);
>>>> ?? }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +????????? unsigned long new_addr, unsigned long old_end,
>>>> +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +??? spinlock_t *old_ptl, *new_ptl;
>>>> +??? struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +??????? || old_end - old_addr < PMD_SIZE)
>>>> +??????? return false;
>>>> +
>>>> +??? /*
>>>> +???? * The destination pmd shouldn't be established, free_pgtables()
>>>> +???? * should have release it.
>>>> +???? */
>>>> +??? if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +??????? return false;
>>>> +
>>>> +??? /*
>>>> +???? * We don't have to worry about the ordering of src and dst
>>>> +???? * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +???? */
>>>> +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +??? if (old_ptl) {
>>>> +??????? pmd_t pmd;
>>>> +
>>>> +??????? new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +??????? if (new_ptl != old_ptl)
>>>> +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +??????? /* Clear the pmd */
>>>> +??????? pmd = *old_pmd;
>>>> +??????? pmd_clear(old_pmd);
>>>> +
>>>> +??????? VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +??????? /* Set the new pmd */
>>>> +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Incremental-fixes-to-the-mmremap-patch.patch
Type: text/x-patch
Size: 1812 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-snps-arc/attachments/20181012/f5d1a300/attachment.bin>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch, Size: 1812 bytes --]

>From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Fri, 12 Oct 2018 17:24:10 +0100
Subject: [PATCH] Incremental fixes to the mmremap patch

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/pgalloc.h | 4 ++--
 arch/um/include/asm/pgtable.h | 3 +++
 arch/um/kernel/tlb.c          | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 7485398d0737..1692da55e63a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,4 +359,7 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+
 #endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 763d35bdda01..d17b74184ba0 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -647,3 +647,9 @@ void force_flush_all(void)
 		vma = vma->vm_next;
 	}
 }
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
-- 
2.11.0


[-- Attachment #3: Type: text/plain, Size: 169 bytes --]

_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

^ permalink raw reply related	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: openrisc


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan at kernel.org
>>>> Cc: pantin at google.com
>>>> Cc: hughd at google.com
>>>> Cc: lokeshgidra at google.com
>>>> Cc: dancol at google.com
>>>> Cc: mhocko at kernel.org
>>>> Cc: kirill at shutemov.name
>>>> Cc: akpm at linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Incremental-fixes-to-the-mmremap-patch.patch
Type: text/x-patch
Size: 1812 bytes
Desc: not available
URL: <http://lists.librecores.org/pipermail/openrisc/attachments/20181012/f5d1a300/attachment-0001.bin>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:42           ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]


On 10/12/18 3:48 PM, Anton Ivanov wrote:
> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>> Android needs to mremap large regions of memory during memory 
>>>> management
>>>> related operations. The mremap system call can be really slow if 
>>>> THP is
>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>> pte at a time, and can be really slow across a large map. Turning 
>>>> on THP
>>>> may not be a viable option, and is not for us. This patch speeds up 
>>>> the
>>>> performance for non-THP system by copying at the PMD level when 
>>>> possible.
>>>>
>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>> completion times drops from 160-250 millesconds to 380-400 
>>>> microseconds.
>>>>
>>>> Before:
>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>
>>>> After:
>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>
>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>> tlb every time we do this optimization since I couldn't find a way to
>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>> doing so is not much compared the improvement, on both x86-64 and 
>>>> arm64.
>>>>
>>>> Cc: minchan@kernel.org
>>>> Cc: pantin@google.com
>>>> Cc: hughd@google.com
>>>> Cc: lokeshgidra@google.com
>>>> Cc: dancol@google.com
>>>> Cc: mhocko@kernel.org
>>>> Cc: kirill@shutemov.name
>>>> Cc: akpm@linux-foundation.org
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>> ---
>>>>    mm/mremap.c | 62 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 62 insertions(+)
>>>>
>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>> --- a/mm/mremap.c
>>>> +++ b/mm/mremap.c
>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct 
>>>> *vma, pmd_t *old_pmd,
>>>>            drop_rmap_locks(vma);
>>>>    }
>>>> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned 
>>>> long old_addr,
>>>> +          unsigned long new_addr, unsigned long old_end,
>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>> +{
>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>> +
>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>> +        || old_end - old_addr < PMD_SIZE)
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>> +     * should have release it.
>>>> +     */
>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>> +        return false;
>>>> +
>>>> +    /*
>>>> +     * We don't have to worry about the ordering of src and dst
>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>> +     */
>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>> +    if (old_ptl) {
>>>> +        pmd_t pmd;
>>>> +
>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>> +        if (new_ptl != old_ptl)
>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>> +
>>>> +        /* Clear the pmd */
>>>> +        pmd = *old_pmd;
>>>> +        pmd_clear(old_pmd);
>>>> +
>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>> +
>>>> +        /* Set the new pmd */
>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>> UML does not have set_pmd_at at all
>> Every architecture does. :)
>
> I tried to build it patching vs 4.19-rc before I made this statement 
> and ran into that.
>
> Presently it does not.
>
> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is 
> not on the list.

Once this problem as well as the omissions in the include changes for 
UML in patch one have been fixed it appears to be working.

What it needs is attached.


>
>>
>> But it may come not from the arch code.
>
> There is no generic definition as far as I can see. All 12 defines in 
> 4.19 are in arch specific code. Unless i am missing something...
>
>>
>>> If I read the code right, MIPS completely ignores the address 
>>> argument so
>>> set_pmd_at there may not have the effect which this patch is trying to
>>> achieve.
>> Ignoring address is fine. Most architectures do that..
>> The ideas is to move page table to the new pmd slot. It's nothing to do
>> with the address passed to set_pmd_at().
>
> If that is it's only function, then I am going to appropriate the code 
> out of the MIPS tree for further uml testing. It does exactly that - 
> just move the pmd the new slot.
>
>>
> A.


A.


[-- Attachment #2: 0001-Incremental-fixes-to-the-mmremap-patch.patch --]
[-- Type: text/x-patch, Size: 1811 bytes --]

From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Fri, 12 Oct 2018 17:24:10 +0100
Subject: [PATCH] Incremental fixes to the mmremap patch

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/pgalloc.h | 4 ++--
 arch/um/include/asm/pgtable.h | 3 +++
 arch/um/kernel/tlb.c          | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 7485398d0737..1692da55e63a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,4 +359,7 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+
 #endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 763d35bdda01..d17b74184ba0 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -647,3 +647,9 @@ void force_flush_all(void)
 		vma = vma->vm_next;
 	}
 }
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12 16:38       ` Julia Lawall
                           ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 16:46         ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: Julia Lawall
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: Julia Lawall
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: Julia Lawall
  Cc: Anton Ivanov, linux-kernel, linux-mips, Rich Felker, linux-ia64,
	linux-sh, Peter Zijlstra, Catalin Marinas, Dave Hansen,
	Will Deacon, Michal Hocko, linux-mm, lokeshgidra, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	sparclinux, linux-xtensa, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, linux-m68k, openrisc, Borislav Petkov,
	Andy Lutomirski, nios2-dev, kirill, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, Richard Weinberger,
	linux-parisc, pantin, Max Filippov, minchan, Thomas Gleixner,
	linux-alpha, Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: Julia Lawall
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov, Ingo Molnar,
	kvmarm, Geert Uytterhoeven, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, openrisc, Borislav Petkov, Andy Lutomirski,
	Andrey Ryabinin, kirill, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller, Ley Foon Tan

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: Julia Lawall
  Cc: linux-mips, linux-m68k, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	Michal Hocko, linux-mm, lokeshgidra, sparclinux, linux-riscv,
	elfring, Jonas Bonn, linux-s390, dancol, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, Anton Ivanov, Ingo Molnar,
	kvmarm, Geert Uytterhoeven, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, openrisc, Borislav Petkov, Andy Lutomirski,
	Andrey Ryabinin, kirill, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller, Ley Foon Tan

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 16:46         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:46 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 06:38:57PM +0200, Julia Lawall wrote:
> > I wrote something like this as below but it failed to compile, Julia any
> > suggestions on how to express this?
> >
> > @pte_alloc_func_proto depends on patch exists@
> > type T1, T2, T3, T4;
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> > @@
> >
> > (
> > - T3 fn(T1, T2);
> > + T3 fn(T1);
> > |
> > - T3 fn(T1, T2, T4);
> > + T3 fn(T1, T2);
> > )
> 
> What goes wrong?  It seems fine to me.

Weird it seems working now. I could swear 5 minutes ago it wasn't and I did
give a unique rule name. Don't know what I missed.

Anyway, thank you for all the quick responses and the help!

- Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 16:42           ` Anton Ivanov
                               ` (7 preceding siblings ...)
  (?)
@ 2018-10-12 16:50             ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> =

> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying=
 each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > =

> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the m=
remap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > =

> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > =

> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > =

> > > > > Incase THP is enabled, the optimization is skipped. I also flush =
the
> > > > > tlb every time we do this optimization since I couldn't find a wa=
y to
> > > > > determine if the low-level PTEs are dirty. It is seen that the co=
st of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > =

> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > > =A0=A0 mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > =A0=A0 1 file changed, 62 insertions(+)
> > > > > =

> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 drop_rmap_locks(vma);
> > > > > =A0=A0 }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +=A0=A0=A0=A0=A0=A0=A0=A0=A0 unsigned long new_addr, unsigned lon=
g old_end,
> > > > > +=A0=A0=A0=A0=A0=A0=A0=A0=A0 pmd_t *old_pmd, pmd_t *new_pmd, bool=
 *need_flush)
> > > > > +{
> > > > > +=A0=A0=A0 spinlock_t *old_ptl, *new_ptl;
> > > > > +=A0=A0=A0 struct mm_struct *mm =3D vma->vm_mm;
> > > > > +
> > > > > +=A0=A0=A0 if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +=A0=A0=A0=A0=A0=A0=A0 || old_end - old_addr < PMD_SIZE)
> > > > > +=A0=A0=A0=A0=A0=A0=A0 return false;
> > > > > +
> > > > > +=A0=A0=A0 /*
> > > > > +=A0=A0=A0=A0 * The destination pmd shouldn't be established, fre=
e_pgtables()
> > > > > +=A0=A0=A0=A0 * should have release it.
> > > > > +=A0=A0=A0=A0 */
> > > > > +=A0=A0=A0 if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +=A0=A0=A0=A0=A0=A0=A0 return false;
> > > > > +
> > > > > +=A0=A0=A0 /*
> > > > > +=A0=A0=A0=A0 * We don't have to worry about the ordering of src =
and dst
> > > > > +=A0=A0=A0=A0 * ptlocks because exclusive mmap_sem prevents deadl=
ock.
> > > > > +=A0=A0=A0=A0 */
> > > > > +=A0=A0=A0 old_ptl =3D pmd_lock(vma->vm_mm, old_pmd);
> > > > > +=A0=A0=A0 if (old_ptl) {
> > > > > +=A0=A0=A0=A0=A0=A0=A0 pmd_t pmd;
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 new_ptl =3D pmd_lockptr(mm, new_pmd);
> > > > > +=A0=A0=A0=A0=A0=A0=A0 if (new_ptl !=3D old_ptl)
> > > > > +=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 spin_lock_nested(new_ptl, SING=
LE_DEPTH_NESTING);
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 /* Clear the pmd */
> > > > > +=A0=A0=A0=A0=A0=A0=A0 pmd =3D *old_pmd;
> > > > > +=A0=A0=A0=A0=A0=A0=A0 pmd_clear(old_pmd);
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 /* Set the new pmd */
> > > > > +=A0=A0=A0=A0=A0=A0=A0 set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > =

> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > =

> > Presently it does not.
> > =

> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> =

> Once this problem as well as the omissions in the include changes for UML=
 in
> patch one have been fixed it appears to be working.
> =

> What it needs is attached.
> =

> =

> > =

> > > =

> > > But it may come not from the arch code.
> > =

> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > =

> > > =

> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying=
 to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to =
do
> > > with the address passed to set_pmd_at().
> > =

> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > =

> > > =

> > A.
> =

> =

> A.
> =


> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> =

> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> =

> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  =

> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  =

> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma =3D vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp =3D pmd;
> +}
> +

I believe this should be included in a separate patch since it is not relat=
ed
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Kirill A. Shutemov, linux-kernel, linux-mips, Rich Felker,
	linux-ia64, linux-sh, Peter Zijlstra, Catalin Marinas,
	Dave Hansen, Will Deacon, mhocko, linux-mm, lokeshgidra,
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan at kernel.org
> > > > > Cc: pantin at google.com
> > > > > Cc: hughd at google.com
> > > > > Cc: lokeshgidra at google.com
> > > > > Cc: dancol at google.com
> > > > > Cc: mhocko at kernel.org
> > > > > Cc: kirill at shutemov.name
> > > > > Cc: akpm at linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > > ?? mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > ?? 1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > ?????????? drop_rmap_locks(vma);
> > > > > ?? }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +????????? unsigned long new_addr, unsigned long old_end,
> > > > > +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +??? spinlock_t *old_ptl, *new_ptl;
> > > > > +??? struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +??????? || old_end - old_addr < PMD_SIZE)
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * The destination pmd shouldn't be established, free_pgtables()
> > > > > +???? * should have release it.
> > > > > +???? */
> > > > > +??? if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * We don't have to worry about the ordering of src and dst
> > > > > +???? * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +???? */
> > > > > +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +??? if (old_ptl) {
> > > > > +??????? pmd_t pmd;
> > > > > +
> > > > > +??????? new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +??????? if (new_ptl != old_ptl)
> > > > > +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +??????? /* Clear the pmd */
> > > > > +??????? pmd = *old_pmd;
> > > > > +??????? pmd_clear(old_pmd);
> > > > > +
> > > > > +??????? VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +??????? /* Set the new pmd */
> > > > > +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan,
	Kirill A. Shutemov, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Kirill A. Shutemov, linux-kernel, linux-mips, Rich Felker,
	linux-ia64, linux-sh, Peter Zijlstra, Catalin Marinas,
	Dave Hansen, Will Deacon, mhocko, linux-mm, lokeshgidra,
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan,
	Kirill A. Shutemov, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan at kernel.org
> > > > > Cc: pantin at google.com
> > > > > Cc: hughd at google.com
> > > > > Cc: lokeshgidra at google.com
> > > > > Cc: dancol at google.com
> > > > > Cc: mhocko at kernel.org
> > > > > Cc: kirill at shutemov.name
> > > > > Cc: akpm at linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> > > > > ---
> > > > > ?? mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > ?? 1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > ?????????? drop_rmap_locks(vma);
> > > > > ?? }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +????????? unsigned long new_addr, unsigned long old_end,
> > > > > +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +??? spinlock_t *old_ptl, *new_ptl;
> > > > > +??? struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +??????? || old_end - old_addr < PMD_SIZE)
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * The destination pmd shouldn't be established, free_pgtables()
> > > > > +???? * should have release it.
> > > > > +???? */
> > > > > +??? if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * We don't have to worry about the ordering of src and dst
> > > > > +???? * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +???? */
> > > > > +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +??? if (old_ptl) {
> > > > > +??????? pmd_t pmd;
> > > > > +
> > > > > +??????? new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +??????? if (new_ptl != old_ptl)
> > > > > +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +??????? /* Clear the pmd */
> > > > > +??????? pmd = *old_pmd;
> > > > > +??????? pmd_clear(old_pmd);
> > > > > +
> > > > > +??????? VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +??????? /* Set the new pmd */
> > > > > +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan at kernel.org
> > > > > Cc: pantin at google.com
> > > > > Cc: hughd at google.com
> > > > > Cc: lokeshgidra at google.com
> > > > > Cc: dancol at google.com
> > > > > Cc: mhocko at kernel.org
> > > > > Cc: kirill at shutemov.name
> > > > > Cc: akpm at linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:50             ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:50 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Kirill A. Shutemov, linux-kernel, linux-mips, Rich Felker,
	linux-ia64, linux-sh, Peter Zijlstra, Catalin Marinas,
	Dave Hansen, Will Deacon, mhocko, linux-mm, lokeshgidra,
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > > �� mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > �� 1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > ���������� drop_rmap_locks(vma);
> > > > > �� }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +��������� unsigned long new_addr, unsigned long old_end,
> > > > > +��������� pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +��� spinlock_t *old_ptl, *new_ptl;
> > > > > +��� struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +��� if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +������� || old_end - old_addr < PMD_SIZE)
> > > > > +������� return false;
> > > > > +
> > > > > +��� /*
> > > > > +���� * The destination pmd shouldn't be established, free_pgtables()
> > > > > +���� * should have release it.
> > > > > +���� */
> > > > > +��� if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +������� return false;
> > > > > +
> > > > > +��� /*
> > > > > +���� * We don't have to worry about the ordering of src and dst
> > > > > +���� * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +���� */
> > > > > +��� old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +��� if (old_ptl) {
> > > > > +������� pmd_t pmd;
> > > > > +
> > > > > +������� new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +������� if (new_ptl != old_ptl)
> > > > > +����������� spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +������� /* Clear the pmd */
> > > > > +������� pmd = *old_pmd;
> > > > > +������� pmd_clear(old_pmd);
> > > > > +
> > > > > +������� VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +������� /* Set the new pmd */
> > > > > +������� set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.
> 
> 
> > 
> > > 
> > > But it may come not from the arch code.
> > 
> > There is no generic definition as far as I can see. All 12 defines in
> > 4.19 are in arch specific code. Unless i am missing something...
> > 
> > > 
> > > > If I read the code right, MIPS completely ignores the address
> > > > argument so
> > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > achieve.
> > > Ignoring address is fine. Most architectures do that..
> > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > with the address passed to set_pmd_at().
> > 
> > If that is it's only function, then I am going to appropriate the code
> > out of the MIPS tree for further uml testing. It does exactly that -
> > just move the pmd the new slot.
> > 
> > > 
> > A.
> 
> 
> A.
> 

> From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> Date: Fri, 12 Oct 2018 17:24:10 +0100
> Subject: [PATCH] Incremental fixes to the mmremap patch
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/include/asm/pgalloc.h | 4 ++--
>  arch/um/include/asm/pgtable.h | 3 +++
>  arch/um/kernel/tlb.c          | 6 ++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> index bf90b2aa2002..99eb5682792a 100644
> --- a/arch/um/include/asm/pgalloc.h
> +++ b/arch/um/include/asm/pgalloc.h
> @@ -25,8 +25,8 @@
>  extern pgd_t *pgd_alloc(struct mm_struct *);
>  extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>  
> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> +extern pgtable_t pte_alloc_one(struct mm_struct *);

If its Ok, let me handle this bit since otherwise it complicates things for
me.

>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> index 7485398d0737..1692da55e63a 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -359,4 +359,7 @@ do {						\
>  	__flush_tlb_one((vaddr));		\
>  } while (0)
>  
> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd);
> +
>  #endif
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 763d35bdda01..d17b74184ba0 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -647,3 +647,9 @@ void force_flush_all(void)
>  		vma = vma->vm_next;
>  	}
>  }
> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> +		pmd_t *pmdp, pmd_t pmd)
> +{
> +	*pmdp = pmd;
> +}
> +

I believe this should be included in a separate patch since it is not related
specifically to pte_alloc argument removal. If you want, I could split it
into a separate patch for my series with you as author.

thanks,

- Joel



^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 13:19         ` Kirill A. Shutemov
                             ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 16:57           ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018@05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018@02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018@06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:57           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 16:57 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > Android needs to mremap large regions of memory during memory management
> > > > related operations. The mremap system call can be really slow if THP is
> > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > performance for non-THP system by copying at the PMD level when possible.
> > > > 
> > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > 
> > > > Before:
> > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > 
> > > > After:
> > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > 
> > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > tlb every time we do this optimization since I couldn't find a way to
> > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > I looked into the code more and noticed move_pte() helper called from
> > > move_ptes(). It changes PTE entry to suite new address.
> > > 
> > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > Sparc and it's hard for me to say if the optimization will break anything
> > > there.
> > 
> > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > not modifying the PTE itself AFAICS:
> > 
> > #ifdef DCACHE_ALIASING_POSSIBLE
> > #define __HAVE_ARCH_MOVE_PTE
> > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > ({                                                                      \
> >         pte_t newpte = (pte);                                           \
> >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> >                 unsigned long this_pfn = pte_pfn(pte);                  \
> >                                                                         \
> >                 if (pfn_valid(this_pfn) &&                              \
> >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> >                         flush_dcache_page_all(current->mm,              \
> >                                               pfn_to_page(this_pfn));   \
> >         }                                                               \
> >         newpte;                                                         \
> > })
> > #endif
> > 
> > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > see the huge page code (move_huge_pages) during mremap doing anything special
> > for Sparc architecture when moving PMDs..
> 
> My *guess* is that it will work fine on Sparc as it apprarently it only
> cares about change in bit 13 of virtual address. It will never happen for
> huge pages or when PTE page tables move.
> 
> But I just realized that the problem is bigger: since we pass new_addr to
> the set_pte_at() we would need to audit all implementations that they are
> safe with just moving PTE page table.
> 
> I would rather go with per-architecture enabling. It's much safer.

I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

Also, do you feel we should still need to remove the address argument from
set_pte_alloc? Or should we leave that alone if we do per-arch?
I figure I spent a bunch of time on that already anyway, and its a clean up
anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
be a separate patch independent of this series?

> > Also, do we not flush the caches from any path when we munmap address space?
> > We do call do_munmap on the old mapping from mremap after moving to the new one.
> 
> Are you sure about that? It can be hided deeper in architecture-specific
> code.

I am sure we do call do_munmap, I was asking if we flush the caches as well.
If we're enabling this per architecture, then I guess it does not matter for
the purposes of this patch.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 16:50             ` Joel Fernandes
                                 ` (6 preceding siblings ...)
  (?)
@ 2018-10-12 16:58               ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

Ck9uIDEwLzEyLzE4IDU6NTAgUE0sIEpvZWwgRmVybmFuZGVzIHdyb3RlOgo+IE9uIEZyaSwgT2N0
IDEyLCAyMDE4IGF0IDA1OjQyOjI0UE0gKzAxMDAsIEFudG9uIEl2YW5vdiB3cm90ZToKPj4gT24g
MTAvMTIvMTggMzo0OCBQTSwgQW50b24gSXZhbm92IHdyb3RlOgo+Pj4gT24gMTIvMTAvMjAxOCAx
NTozNywgS2lyaWxsIEEuIFNodXRlbW92IHdyb3RlOgo+Pj4+IE9uIEZyaSwgT2N0IDEyLCAyMDE4
IGF0IDAzOjA5OjQ5UE0gKzAxMDAsIEFudG9uIEl2YW5vdiB3cm90ZToKPj4+Pj4gT24gMTAvMTIv
MTggMjozNyBBTSwgSm9lbCBGZXJuYW5kZXMgKEdvb2dsZSkgd3JvdGU6Cj4+Pj4+PiBBbmRyb2lk
IG5lZWRzIHRvIG1yZW1hcCBsYXJnZSByZWdpb25zIG9mIG1lbW9yeSBkdXJpbmcKPj4+Pj4+IG1l
bW9yeSBtYW5hZ2VtZW50Cj4+Pj4+PiByZWxhdGVkIG9wZXJhdGlvbnMuIFRoZSBtcmVtYXAgc3lz
dGVtIGNhbGwgY2FuIGJlIHJlYWxseQo+Pj4+Pj4gc2xvdyBpZiBUSFAgaXMKPj4+Pj4+IG5vdCBl
bmFibGVkLiBUaGUgYm90dGxlbmVjayBpcyBtb3ZlX3BhZ2VfdGFibGVzLCB3aGljaCBpcyBjb3B5
aW5nIGVhY2gKPj4+Pj4+IHB0ZSBhdCBhIHRpbWUsIGFuZCBjYW4gYmUgcmVhbGx5IHNsb3cgYWNy
b3NzIGEgbGFyZ2UgbWFwLgo+Pj4+Pj4gVHVybmluZyBvbiBUSFAKPj4+Pj4+IG1heSBub3QgYmUg
YSB2aWFibGUgb3B0aW9uLCBhbmQgaXMgbm90IGZvciB1cy4gVGhpcyBwYXRjaAo+Pj4+Pj4gc3Bl
ZWRzIHVwIHRoZQo+Pj4+Pj4gcGVyZm9ybWFuY2UgZm9yIG5vbi1USFAgc3lzdGVtIGJ5IGNvcHlp
bmcgYXQgdGhlIFBNRCBsZXZlbAo+Pj4+Pj4gd2hlbiBwb3NzaWJsZS4KPj4+Pj4+Cj4+Pj4+PiBU
aGUgc3BlZWQgdXAgaXMgdGhyZWUgb3JkZXJzIG9mIG1hZ25pdHVkZS4gT24gYSAxR0IgbXJlbWFw
LCB0aGUgbXJlbWFwCj4+Pj4+PiBjb21wbGV0aW9uIHRpbWVzIGRyb3BzIGZyb20gMTYwLTI1MCBt
aWxsZXNjb25kcyB0byAzODAtNDAwCj4+Pj4+PiBtaWNyb3NlY29uZHMuCj4+Pj4+Pgo+Pj4+Pj4g
QmVmb3JlOgo+Pj4+Pj4gVG90YWwgbXJlbWFwIHRpbWUgZm9yIDFHQiBkYXRhOiAyNDIzMjEwMTQg
bmFub3NlY29uZHMuCj4+Pj4+PiBUb3RhbCBtcmVtYXAgdGltZSBmb3IgMUdCIGRhdGE6IDE5Njg0
MjQ2NyBuYW5vc2Vjb25kcy4KPj4+Pj4+IFRvdGFsIG1yZW1hcCB0aW1lIGZvciAxR0IgZGF0YTog
MTY3MDUxMTYyIG5hbm9zZWNvbmRzLgo+Pj4+Pj4KPj4+Pj4+IEFmdGVyOgo+Pj4+Pj4gVG90YWwg
bXJlbWFwIHRpbWUgZm9yIDFHQiBkYXRhOiAzODU3ODEgbmFub3NlY29uZHMuCj4+Pj4+PiBUb3Rh
bCBtcmVtYXAgdGltZSBmb3IgMUdCIGRhdGE6IDM4ODk1OSBuYW5vc2Vjb25kcy4KPj4+Pj4+IFRv
dGFsIG1yZW1hcCB0aW1lIGZvciAxR0IgZGF0YTogNDAyODEzIG5hbm9zZWNvbmRzLgo+Pj4+Pj4K
Pj4+Pj4+IEluY2FzZSBUSFAgaXMgZW5hYmxlZCwgdGhlIG9wdGltaXphdGlvbiBpcyBza2lwcGVk
LiBJIGFsc28gZmx1c2ggdGhlCj4+Pj4+PiB0bGIgZXZlcnkgdGltZSB3ZSBkbyB0aGlzIG9wdGlt
aXphdGlvbiBzaW5jZSBJIGNvdWxkbid0IGZpbmQgYSB3YXkgdG8KPj4+Pj4+IGRldGVybWluZSBp
ZiB0aGUgbG93LWxldmVsIFBURXMgYXJlIGRpcnR5LiBJdCBpcyBzZWVuIHRoYXQgdGhlIGNvc3Qg
b2YKPj4+Pj4+IGRvaW5nIHNvIGlzIG5vdCBtdWNoIGNvbXBhcmVkIHRoZSBpbXByb3ZlbWVudCwg
b24gYm90aAo+Pj4+Pj4geDg2LTY0IGFuZCBhcm02NC4KPj4+Pj4+Cj4+Pj4+PiBDYzogbWluY2hh
bkBrZXJuZWwub3JnCj4+Pj4+PiBDYzogcGFudGluQGdvb2dsZS5jb20KPj4+Pj4+IENjOiBodWdo
ZEBnb29nbGUuY29tCj4+Pj4+PiBDYzogbG9rZXNoZ2lkcmFAZ29vZ2xlLmNvbQo+Pj4+Pj4gQ2M6
IGRhbmNvbEBnb29nbGUuY29tCj4+Pj4+PiBDYzogbWhvY2tvQGtlcm5lbC5vcmcKPj4+Pj4+IENj
OiBraXJpbGxAc2h1dGVtb3YubmFtZQo+Pj4+Pj4gQ2M6IGFrcG1AbGludXgtZm91bmRhdGlvbi5v
cmcKPj4+Pj4+IFNpZ25lZC1vZmYtYnk6IEpvZWwgRmVybmFuZGVzIChHb29nbGUpIDxqb2VsQGpv
ZWxmZXJuYW5kZXMub3JnPgo+Pj4+Pj4gLS0tCj4+Pj4+PiAgwqDCoCBtbS9tcmVtYXAuYyB8IDYy
Cj4+Pj4+PiArKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysr
KysrKwo+Pj4+Pj4gIMKgwqAgMSBmaWxlIGNoYW5nZWQsIDYyIGluc2VydGlvbnMoKykKPj4+Pj4+
Cj4+Pj4+PiBkaWZmIC0tZ2l0IGEvbW0vbXJlbWFwLmMgYi9tbS9tcmVtYXAuYwo+Pj4+Pj4gaW5k
ZXggOWU2OGEwMmE1MmIxLi5kODJjNDg1ODIyZWYgMTAwNjQ0Cj4+Pj4+PiAtLS0gYS9tbS9tcmVt
YXAuYwo+Pj4+Pj4gKysrIGIvbW0vbXJlbWFwLmMKPj4+Pj4+IEBAIC0xOTEsNiArMTkxLDU0IEBA
IHN0YXRpYyB2b2lkIG1vdmVfcHRlcyhzdHJ1Y3QKPj4+Pj4+IHZtX2FyZWFfc3RydWN0ICp2bWEs
IHBtZF90ICpvbGRfcG1kLAo+Pj4+Pj4gIMKgwqDCoMKgwqDCoMKgwqDCoMKgIGRyb3Bfcm1hcF9s
b2Nrcyh2bWEpOwo+Pj4+Pj4gIMKgwqAgfQo+Pj4+Pj4gK3N0YXRpYyBib29sIG1vdmVfbm9ybWFs
X3BtZChzdHJ1Y3Qgdm1fYXJlYV9zdHJ1Y3QgKnZtYSwKPj4+Pj4+IHVuc2lnbmVkIGxvbmcgb2xk
X2FkZHIsCj4+Pj4+PiArwqDCoMKgwqDCoMKgwqDCoMKgIHVuc2lnbmVkIGxvbmcgbmV3X2FkZHIs
IHVuc2lnbmVkIGxvbmcgb2xkX2VuZCwKPj4+Pj4+ICvCoMKgwqDCoMKgwqDCoMKgwqAgcG1kX3Qg
Km9sZF9wbWQsIHBtZF90ICpuZXdfcG1kLCBib29sICpuZWVkX2ZsdXNoKQo+Pj4+Pj4gK3sKPj4+
Pj4+ICvCoMKgwqAgc3BpbmxvY2tfdCAqb2xkX3B0bCwgKm5ld19wdGw7Cj4+Pj4+PiArwqDCoMKg
IHN0cnVjdCBtbV9zdHJ1Y3QgKm1tID0gdm1hLT52bV9tbTsKPj4+Pj4+ICsKPj4+Pj4+ICvCoMKg
wqAgaWYgKChvbGRfYWRkciAmIH5QTURfTUFTSykgfHwgKG5ld19hZGRyICYgflBNRF9NQVNLKQo+
Pj4+Pj4gK8KgwqDCoMKgwqDCoMKgIHx8IG9sZF9lbmQgLSBvbGRfYWRkciA8IFBNRF9TSVpFKQo+
Pj4+Pj4gK8KgwqDCoMKgwqDCoMKgIHJldHVybiBmYWxzZTsKPj4+Pj4+ICsKPj4+Pj4+ICvCoMKg
wqAgLyoKPj4+Pj4+ICvCoMKgwqDCoCAqIFRoZSBkZXN0aW5hdGlvbiBwbWQgc2hvdWxkbid0IGJl
IGVzdGFibGlzaGVkLCBmcmVlX3BndGFibGVzKCkKPj4+Pj4+ICvCoMKgwqDCoCAqIHNob3VsZCBo
YXZlIHJlbGVhc2UgaXQuCj4+Pj4+PiArwqDCoMKgwqAgKi8KPj4+Pj4+ICvCoMKgwqAgaWYgKFdB
Uk5fT04oIXBtZF9ub25lKCpuZXdfcG1kKSkpCj4+Pj4+PiArwqDCoMKgwqDCoMKgwqAgcmV0dXJu
IGZhbHNlOwo+Pj4+Pj4gKwo+Pj4+Pj4gK8KgwqDCoCAvKgo+Pj4+Pj4gK8KgwqDCoMKgICogV2Ug
ZG9uJ3QgaGF2ZSB0byB3b3JyeSBhYm91dCB0aGUgb3JkZXJpbmcgb2Ygc3JjIGFuZCBkc3QKPj4+
Pj4+ICvCoMKgwqDCoCAqIHB0bG9ja3MgYmVjYXVzZSBleGNsdXNpdmUgbW1hcF9zZW0gcHJldmVu
dHMgZGVhZGxvY2suCj4+Pj4+PiArwqDCoMKgwqAgKi8KPj4+Pj4+ICvCoMKgwqAgb2xkX3B0bCA9
IHBtZF9sb2NrKHZtYS0+dm1fbW0sIG9sZF9wbWQpOwo+Pj4+Pj4gK8KgwqDCoCBpZiAob2xkX3B0
bCkgewo+Pj4+Pj4gK8KgwqDCoMKgwqDCoMKgIHBtZF90IHBtZDsKPj4+Pj4+ICsKPj4+Pj4+ICvC
oMKgwqDCoMKgwqDCoCBuZXdfcHRsID0gcG1kX2xvY2twdHIobW0sIG5ld19wbWQpOwo+Pj4+Pj4g
K8KgwqDCoMKgwqDCoMKgIGlmIChuZXdfcHRsICE9IG9sZF9wdGwpCj4+Pj4+PiArwqDCoMKgwqDC
oMKgwqDCoMKgwqDCoCBzcGluX2xvY2tfbmVzdGVkKG5ld19wdGwsIFNJTkdMRV9ERVBUSF9ORVNU
SU5HKTsKPj4+Pj4+ICsKPj4+Pj4+ICvCoMKgwqDCoMKgwqDCoCAvKiBDbGVhciB0aGUgcG1kICov
Cj4+Pj4+PiArwqDCoMKgwqDCoMKgwqAgcG1kID0gKm9sZF9wbWQ7Cj4+Pj4+PiArwqDCoMKgwqDC
oMKgwqAgcG1kX2NsZWFyKG9sZF9wbWQpOwo+Pj4+Pj4gKwo+Pj4+Pj4gK8KgwqDCoMKgwqDCoMKg
IFZNX0JVR19PTighcG1kX25vbmUoKm5ld19wbWQpKTsKPj4+Pj4+ICsKPj4+Pj4+ICvCoMKgwqDC
oMKgwqDCoCAvKiBTZXQgdGhlIG5ldyBwbWQgKi8KPj4+Pj4+ICvCoMKgwqDCoMKgwqDCoCBzZXRf
cG1kX2F0KG1tLCBuZXdfYWRkciwgbmV3X3BtZCwgcG1kKTsKPj4+Pj4gVU1MIGRvZXMgbm90IGhh
dmUgc2V0X3BtZF9hdCBhdCBhbGwKPj4+PiBFdmVyeSBhcmNoaXRlY3R1cmUgZG9lcy4gOikKPj4+
IEkgdHJpZWQgdG8gYnVpbGQgaXQgcGF0Y2hpbmcgdnMgNC4xOS1yYyBiZWZvcmUgSSBtYWRlIHRo
aXMgc3RhdGVtZW50IGFuZAo+Pj4gcmFuIGludG8gdGhhdC4KPj4+Cj4+PiBQcmVzZW50bHkgaXQg
ZG9lcyBub3QuCj4+Pgo+Pj4gaHR0cHM6Ly9lbGl4aXIuYm9vdGxpbi5jb20vbGludXgvdjQuMTkt
cmM3L2lkZW50L3NldF9wbWRfYXQgLSBVTUwgaXMgbm90Cj4+PiBvbiB0aGUgbGlzdC4KPj4gT25j
ZSB0aGlzIHByb2JsZW0gYXMgd2VsbCBhcyB0aGUgb21pc3Npb25zIGluIHRoZSBpbmNsdWRlIGNo
YW5nZXMgZm9yIFVNTCBpbgo+PiBwYXRjaCBvbmUgaGF2ZSBiZWVuIGZpeGVkIGl0IGFwcGVhcnMg
dG8gYmUgd29ya2luZy4KPj4KPj4gV2hhdCBpdCBuZWVkcyBpcyBhdHRhY2hlZC4KPj4KPj4KPj4+
PiBCdXQgaXQgbWF5IGNvbWUgbm90IGZyb20gdGhlIGFyY2ggY29kZS4KPj4+IFRoZXJlIGlzIG5v
IGdlbmVyaWMgZGVmaW5pdGlvbiBhcyBmYXIgYXMgSSBjYW4gc2VlLiBBbGwgMTIgZGVmaW5lcyBp
bgo+Pj4gNC4xOSBhcmUgaW4gYXJjaCBzcGVjaWZpYyBjb2RlLiBVbmxlc3MgaSBhbSBtaXNzaW5n
IHNvbWV0aGluZy4uLgo+Pj4KPj4+Pj4gSWYgSSByZWFkIHRoZSBjb2RlIHJpZ2h0LCBNSVBTIGNv
bXBsZXRlbHkgaWdub3JlcyB0aGUgYWRkcmVzcwo+Pj4+PiBhcmd1bWVudCBzbwo+Pj4+PiBzZXRf
cG1kX2F0IHRoZXJlIG1heSBub3QgaGF2ZSB0aGUgZWZmZWN0IHdoaWNoIHRoaXMgcGF0Y2ggaXMg
dHJ5aW5nIHRvCj4+Pj4+IGFjaGlldmUuCj4+Pj4gSWdub3JpbmcgYWRkcmVzcyBpcyBmaW5lLiBN
b3N0IGFyY2hpdGVjdHVyZXMgZG8gdGhhdC4uCj4+Pj4gVGhlIGlkZWFzIGlzIHRvIG1vdmUgcGFn
ZSB0YWJsZSB0byB0aGUgbmV3IHBtZCBzbG90LiBJdCdzIG5vdGhpbmcgdG8gZG8KPj4+PiB3aXRo
IHRoZSBhZGRyZXNzIHBhc3NlZCB0byBzZXRfcG1kX2F0KCkuCj4+PiBJZiB0aGF0IGlzIGl0J3Mg
b25seSBmdW5jdGlvbiwgdGhlbiBJIGFtIGdvaW5nIHRvIGFwcHJvcHJpYXRlIHRoZSBjb2RlCj4+
PiBvdXQgb2YgdGhlIE1JUFMgdHJlZSBmb3IgZnVydGhlciB1bWwgdGVzdGluZy4gSXQgZG9lcyBl
eGFjdGx5IHRoYXQgLQo+Pj4ganVzdCBtb3ZlIHRoZSBwbWQgdGhlIG5ldyBzbG90Lgo+Pj4KPj4+
IEEuCj4+Cj4+IEEuCj4+Cj4+ICBGcm9tIGFjMjY1ZDk2ODk3YTM0NmIwNTY0NmZjZTkxNzg0ZWQ0
OTIyYzdmOGQgTW9uIFNlcCAxNyAwMDowMDowMCAyMDAxCj4+IEZyb206IEFudG9uIEl2YW5vdiA8
YW50b24uaXZhbm92QGNhbWJyaWRnZWdyZXlzLmNvbT4KPj4gRGF0ZTogRnJpLCAxMiBPY3QgMjAx
OCAxNzoyNDoxMCArMDEwMAo+PiBTdWJqZWN0OiBbUEFUQ0hdIEluY3JlbWVudGFsIGZpeGVzIHRv
IHRoZSBtbXJlbWFwIHBhdGNoCj4+Cj4+IFNpZ25lZC1vZmYtYnk6IEFudG9uIEl2YW5vdiA8YW50
b24uaXZhbm92QGNhbWJyaWRnZWdyZXlzLmNvbT4KPj4gLS0tCj4+ICAgYXJjaC91bS9pbmNsdWRl
L2FzbS9wZ2FsbG9jLmggfCA0ICsrLS0KPj4gICBhcmNoL3VtL2luY2x1ZGUvYXNtL3BndGFibGUu
aCB8IDMgKysrCj4+ICAgYXJjaC91bS9rZXJuZWwvdGxiLmMgICAgICAgICAgfCA2ICsrKysrKwo+
PiAgIDMgZmlsZXMgY2hhbmdlZCwgMTEgaW5zZXJ0aW9ucygrKSwgMiBkZWxldGlvbnMoLSkKPj4K
Pj4gZGlmZiAtLWdpdCBhL2FyY2gvdW0vaW5jbHVkZS9hc20vcGdhbGxvYy5oIGIvYXJjaC91bS9p
bmNsdWRlL2FzbS9wZ2FsbG9jLmgKPj4gaW5kZXggYmY5MGIyYWEyMDAyLi45OWViNTY4Mjc5MmEg
MTAwNjQ0Cj4+IC0tLSBhL2FyY2gvdW0vaW5jbHVkZS9hc20vcGdhbGxvYy5oCj4+ICsrKyBiL2Fy
Y2gvdW0vaW5jbHVkZS9hc20vcGdhbGxvYy5oCj4+IEBAIC0yNSw4ICsyNSw4IEBACj4+ICAgZXh0
ZXJuIHBnZF90ICpwZ2RfYWxsb2Moc3RydWN0IG1tX3N0cnVjdCAqKTsKPj4gICBleHRlcm4gdm9p
ZCBwZ2RfZnJlZShzdHJ1Y3QgbW1fc3RydWN0ICptbSwgcGdkX3QgKnBnZCk7Cj4+ICAgCj4+IC1l
eHRlcm4gcHRlX3QgKnB0ZV9hbGxvY19vbmVfa2VybmVsKHN0cnVjdCBtbV9zdHJ1Y3QgKiwgdW5z
aWduZWQgbG9uZyk7Cj4+IC1leHRlcm4gcGd0YWJsZV90IHB0ZV9hbGxvY19vbmUoc3RydWN0IG1t
X3N0cnVjdCAqLCB1bnNpZ25lZCBsb25nKTsKPj4gK2V4dGVybiBwdGVfdCAqcHRlX2FsbG9jX29u
ZV9rZXJuZWwoc3RydWN0IG1tX3N0cnVjdCAqKTsKPj4gK2V4dGVybiBwZ3RhYmxlX3QgcHRlX2Fs
bG9jX29uZShzdHJ1Y3QgbW1fc3RydWN0ICopOwo+IElmIGl0cyBPaywgbGV0IG1lIGhhbmRsZSB0
aGlzIGJpdCBzaW5jZSBvdGhlcndpc2UgaXQgY29tcGxpY2F0ZXMgdGhpbmdzIGZvcgo+IG1lLgo+
Cj4+ICAgc3RhdGljIGlubGluZSB2b2lkIHB0ZV9mcmVlX2tlcm5lbChzdHJ1Y3QgbW1fc3RydWN0
ICptbSwgcHRlX3QgKnB0ZSkKPj4gICB7Cj4+IGRpZmYgLS1naXQgYS9hcmNoL3VtL2luY2x1ZGUv
YXNtL3BndGFibGUuaCBiL2FyY2gvdW0vaW5jbHVkZS9hc20vcGd0YWJsZS5oCj4+IGluZGV4IDc0
ODUzOThkMDczNy4uMTY5MmRhNTVlNjNhIDEwMDY0NAo+PiAtLS0gYS9hcmNoL3VtL2luY2x1ZGUv
YXNtL3BndGFibGUuaAo+PiArKysgYi9hcmNoL3VtL2luY2x1ZGUvYXNtL3BndGFibGUuaAo+PiBA
QCAtMzU5LDQgKzM1OSw3IEBAIGRvIHsJCQkJCQlcCj4+ICAgCV9fZmx1c2hfdGxiX29uZSgodmFk
ZHIpKTsJCVwKPj4gICB9IHdoaWxlICgwKQo+PiAgIAo+PiArZXh0ZXJuIHZvaWQgc2V0X3BtZF9h
dChzdHJ1Y3QgbW1fc3RydWN0ICptbSwgdW5zaWduZWQgbG9uZyBhZGRyLAo+PiArCQlwbWRfdCAq
cG1kcCwgcG1kX3QgcG1kKTsKPj4gKwo+PiAgICNlbmRpZgo+PiBkaWZmIC0tZ2l0IGEvYXJjaC91
bS9rZXJuZWwvdGxiLmMgYi9hcmNoL3VtL2tlcm5lbC90bGIuYwo+PiBpbmRleCA3NjNkMzViZGRh
MDEuLmQxN2I3NDE4NGJhMCAxMDA2NDQKPj4gLS0tIGEvYXJjaC91bS9rZXJuZWwvdGxiLmMKPj4g
KysrIGIvYXJjaC91bS9rZXJuZWwvdGxiLmMKPj4gQEAgLTY0NywzICs2NDcsOSBAQCB2b2lkIGZv
cmNlX2ZsdXNoX2FsbCh2b2lkKQo+PiAgIAkJdm1hID0gdm1hLT52bV9uZXh0Owo+PiAgIAl9Cj4+
ICAgfQo+PiArdm9pZCBzZXRfcG1kX2F0KHN0cnVjdCBtbV9zdHJ1Y3QgKm1tLCB1bnNpZ25lZCBs
b25nIGFkZHIsCj4+ICsJCXBtZF90ICpwbWRwLCBwbWRfdCBwbWQpCj4+ICt7Cj4+ICsJKnBtZHAg
PSBwbWQ7Cj4+ICt9Cj4+ICsKPiBJIGJlbGlldmUgdGhpcyBzaG91bGQgYmUgaW5jbHVkZWQgaW4g
YSBzZXBhcmF0ZSBwYXRjaCBzaW5jZSBpdCBpcyBub3QgcmVsYXRlZAo+IHNwZWNpZmljYWxseSB0
byBwdGVfYWxsb2MgYXJndW1lbnQgcmVtb3ZhbC4gSWYgeW91IHdhbnQsIEkgY291bGQgc3BsaXQg
aXQKPiBpbnRvIGEgc2VwYXJhdGUgcGF0Y2ggZm9yIG15IHNlcmllcyB3aXRoIHlvdSBhcyBhdXRo
b3IuCgoKV2hpY2hldmVyIGlzIG1vcmUgY29udmVuaWVudCBmb3IgeW91LgoKT25lIHRoaW5nIHRv
IG5vdGUgLSB0bGIgZmx1c2ggaXMgZXh0cmVtZWx5IGV4cGVuc2l2ZSBvbiB1bWwuCgpJIGhhdmUg
bGlmdGVkIHRoZSBkZWZpbml0aW9uIG9mIHNldF9wbWRfYXQgZnJvbSB0aGUgbWlwcyB0cmVlIGFu
ZCAKcmVtb3ZlZCB0aGUgdGxiX2ZsdXNoX2FsbCBmcm9tIGl0IGZvciB0aGlzIGV4YWN0IHJlYXNv
bi4KCklmIEkgcmVhZCB0aGUgb3JpZ2luYWwgcGF0Y2ggY29ycmVjdGx5LCBpdCBkb2VzIGl0cyBv
d24gZmx1c2ggY29udHJvbCBzbyAKc2V0X3BtZF9hdCBkb2VzIG5vdCBuZWVkIHRvIGRvIGEgZm9y
Y2UgZmx1c2ggZXZlcnkgdGltZS4gSXQgaXMgZG9uZSAKZnVydGhlciB1cCB0aGUgY2hhaW4uCgpC
cmdkcywKCkEuCgoKPgo+IHRoYW5rcywKPgo+IC0gSm9lbAo+Cj4KCl9fX19fX19fX19fX19fX19f
X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fCmxpbnV4LXJpc2N2IG1haWxpbmcgbGlzdAps
aW51eC1yaXNjdkBsaXN0cy5pbmZyYWRlYWQub3JnCmh0dHA6Ly9saXN0cy5pbmZyYWRlYWQub3Jn
L21haWxtYW4vbGlzdGluZm8vbGludXgtcmlzY3YK

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Kirill A. Shutemov, linux-kernel, linux-mips, Rich Felker,
	linux-ia64, linux-sh, Peter Zijlstra, Catalin Marinas,
	Dave Hansen, Will Deacon, mhocko, linux-mm, lokeshgidra,
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: linux-riscv


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan at kernel.org
>>>>>> Cc: pantin at google.com
>>>>>> Cc: hughd at google.com
>>>>>> Cc: lokeshgidra at google.com
>>>>>> Cc: dancol at google.com
>>>>>> Cc: mhocko at kernel.org
>>>>>> Cc: kirill at shutemov.name
>>>>>> Cc: akpm at linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>  ?? mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  ?? 1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>  ?????????? drop_rmap_locks(vma);
>>>>>>  ?? }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +????????? unsigned long new_addr, unsigned long old_end,
>>>>>> +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +??? spinlock_t *old_ptl, *new_ptl;
>>>>>> +??? struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +??????? || old_end - old_addr < PMD_SIZE)
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +???? * should have release it.
>>>>>> +???? */
>>>>>> +??? if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * We don't have to worry about the ordering of src and dst
>>>>>> +???? * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +???? */
>>>>>> +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +??? if (old_ptl) {
>>>>>> +??????? pmd_t pmd;
>>>>>> +
>>>>>> +??????? new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +??????? if (new_ptl != old_ptl)
>>>>>> +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +??????? /* Clear the pmd */
>>>>>> +??????? pmd = *old_pmd;
>>>>>> +??????? pmd_clear(old_pmd);
>>>>>> +
>>>>>> +??????? VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +??????? /* Set the new pmd */
>>>>>> +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan,
	Kirill A. Shutemov, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Kirill A. Shutemov, linux-kernel, linux-mips, Rich Felker,
	linux-ia64, linux-sh, Peter Zijlstra, Catalin Marinas,
	Dave Hansen, Will Deacon, mhocko, linux-mm, lokeshgidra,
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>  A A  mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  A A  1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>  A A A A A A A A A A  drop_rmap_locks(vma);
>>>>>>  A A  }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +A A A A A A A A A  unsigned long new_addr, unsigned long old_end,
>>>>>> +A A A A A A A A A  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +A A A  spinlock_t *old_ptl, *new_ptl;
>>>>>> +A A A  struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +A A A  if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +A A A A A A A  || old_end - old_addr < PMD_SIZE)
>>>>>> +A A A A A A A  return false;
>>>>>> +
>>>>>> +A A A  /*
>>>>>> +A A A A  * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +A A A A  * should have release it.
>>>>>> +A A A A  */
>>>>>> +A A A  if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +A A A A A A A  return false;
>>>>>> +
>>>>>> +A A A  /*
>>>>>> +A A A A  * We don't have to worry about the ordering of src and dst
>>>>>> +A A A A  * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +A A A A  */
>>>>>> +A A A  old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +A A A  if (old_ptl) {
>>>>>> +A A A A A A A  pmd_t pmd;
>>>>>> +
>>>>>> +A A A A A A A  new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +A A A A A A A  if (new_ptl != old_ptl)
>>>>>> +A A A A A A A A A A A  spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +A A A A A A A  /* Clear the pmd */
>>>>>> +A A A A A A A  pmd = *old_pmd;
>>>>>> +A A A A A A A  pmd_clear(old_pmd);
>>>>>> +
>>>>>> +A A A A A A A  VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +A A A A A A A  /* Set the new pmd */
>>>>>> +A A A A A A A  set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan,
	Kirill A. Shutemov, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: linux-snps-arc


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018@05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan at kernel.org
>>>>>> Cc: pantin at google.com
>>>>>> Cc: hughd at google.com
>>>>>> Cc: lokeshgidra at google.com
>>>>>> Cc: dancol at google.com
>>>>>> Cc: mhocko at kernel.org
>>>>>> Cc: kirill at shutemov.name
>>>>>> Cc: akpm at linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
>>>>>> ---
>>>>>>  ?? mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  ?? 1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>  ?????????? drop_rmap_locks(vma);
>>>>>>  ?? }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +????????? unsigned long new_addr, unsigned long old_end,
>>>>>> +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +??? spinlock_t *old_ptl, *new_ptl;
>>>>>> +??? struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +??????? || old_end - old_addr < PMD_SIZE)
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +???? * should have release it.
>>>>>> +???? */
>>>>>> +??? if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * We don't have to worry about the ordering of src and dst
>>>>>> +???? * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +???? */
>>>>>> +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +??? if (old_ptl) {
>>>>>> +??????? pmd_t pmd;
>>>>>> +
>>>>>> +??????? new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +??????? if (new_ptl != old_ptl)
>>>>>> +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +??????? /* Clear the pmd */
>>>>>> +??????? pmd = *old_pmd;
>>>>>> +??????? pmd_clear(old_pmd);
>>>>>> +
>>>>>> +??????? VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +??????? /* Set the new pmd */
>>>>>> +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 16:58               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-12 16:58 UTC (permalink / raw)
  To: openrisc


On 10/12/18 5:50 PM, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan at kernel.org
>>>>>> Cc: pantin at google.com
>>>>>> Cc: hughd at google.com
>>>>>> Cc: lokeshgidra at google.com
>>>>>> Cc: dancol at google.com
>>>>>> Cc: mhocko at kernel.org
>>>>>> Cc: kirill at shutemov.name
>>>>>> Cc: akpm at linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
>>
>>
>>>> But it may come not from the arch code.
>>> There is no generic definition as far as I can see. All 12 defines in
>>> 4.19 are in arch specific code. Unless i am missing something...
>>>
>>>>> If I read the code right, MIPS completely ignores the address
>>>>> argument so
>>>>> set_pmd_at there may not have the effect which this patch is trying to
>>>>> achieve.
>>>> Ignoring address is fine. Most architectures do that..
>>>> The ideas is to move page table to the new pmd slot. It's nothing to do
>>>> with the address passed to set_pmd_at().
>>> If that is it's only function, then I am going to appropriate the code
>>> out of the MIPS tree for further uml testing. It does exactly that -
>>> just move the pmd the new slot.
>>>
>>> A.
>>
>> A.
>>
>>  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> Date: Fri, 12 Oct 2018 17:24:10 +0100
>> Subject: [PATCH] Incremental fixes to the mmremap patch
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/pgalloc.h | 4 ++--
>>   arch/um/include/asm/pgtable.h | 3 +++
>>   arch/um/kernel/tlb.c          | 6 ++++++
>>   3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
>> index bf90b2aa2002..99eb5682792a 100644
>> --- a/arch/um/include/asm/pgalloc.h
>> +++ b/arch/um/include/asm/pgalloc.h
>> @@ -25,8 +25,8 @@
>>   extern pgd_t *pgd_alloc(struct mm_struct *);
>>   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
>>   
>> -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
>> -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
>> +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
>> +extern pgtable_t pte_alloc_one(struct mm_struct *);
> If its Ok, let me handle this bit since otherwise it complicates things for
> me.
>
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
>> index 7485398d0737..1692da55e63a 100644
>> --- a/arch/um/include/asm/pgtable.h
>> +++ b/arch/um/include/asm/pgtable.h
>> @@ -359,4 +359,7 @@ do {						\
>>   	__flush_tlb_one((vaddr));		\
>>   } while (0)
>>   
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd);
>> +
>>   #endif
>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
>> index 763d35bdda01..d17b74184ba0 100644
>> --- a/arch/um/kernel/tlb.c
>> +++ b/arch/um/kernel/tlb.c
>> @@ -647,3 +647,9 @@ void force_flush_all(void)
>>   		vma = vma->vm_next;
>>   	}
>>   }
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	*pmdp = pmd;
>> +}
>> +
> I believe this should be included in a separate patch since it is not related
> specifically to pte_alloc argument removal. If you want, I could split it
> into a separate patch for my series with you as author.


Whichever is more convenient for you.

One thing to note - tlb flush is extremely expensive on uml.

I have lifted the definition of set_pmd_at from the mips tree and 
removed the tlb_flush_all from it for this exact reason.

If I read the original patch correctly, it does its own flush control so 
set_pmd_at does not need to do a force flush every time. It is done 
further up the chain.

Brgds,

A.


>
> thanks,
>
> - Joel
>
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 16:58               ` Anton Ivanov
                                   ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 17:06                 ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Kirill A. Shutemov, linux-kernel, linux-mips, Rich Felker,
	linux-ia64, linux-sh, Peter Zijlstra, Catalin Marinas,
	Dave Hansen, Will Deacon, mhocko, linux-mm, lokeshgidra,
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan,
	Kirill A. Shutemov, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, Max Filippov, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, Ley Foon Tan,
	Kirill A. Shutemov, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, linux-parisc, pantin,
	linux-kernel, Fenghua Yu, minchan, Thomas Gleixner,
	Richard Weinberger, nios2-dev, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 17:06                 ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 17:06 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 05:58:40PM +0100, Anton Ivanov wrote:
[...]
> > > > > > If I read the code right, MIPS completely ignores the address
> > > > > > argument so
> > > > > > set_pmd_at there may not have the effect which this patch is trying to
> > > > > > achieve.
> > > > > Ignoring address is fine. Most architectures do that..
> > > > > The ideas is to move page table to the new pmd slot. It's nothing to do
> > > > > with the address passed to set_pmd_at().
> > > > If that is it's only function, then I am going to appropriate the code
> > > > out of the MIPS tree for further uml testing. It does exactly that -
> > > > just move the pmd the new slot.
> > > > 
> > > > A.
> > > 
> > > A.
> > > 
> > >  From ac265d96897a346b05646fce91784ed4922c7f8d Mon Sep 17 00:00:00 2001
> > > From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > Date: Fri, 12 Oct 2018 17:24:10 +0100
> > > Subject: [PATCH] Incremental fixes to the mmremap patch
> > > 
> > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> > > ---
> > >   arch/um/include/asm/pgalloc.h | 4 ++--
> > >   arch/um/include/asm/pgtable.h | 3 +++
> > >   arch/um/kernel/tlb.c          | 6 ++++++
> > >   3 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
> > > index bf90b2aa2002..99eb5682792a 100644
> > > --- a/arch/um/include/asm/pgalloc.h
> > > +++ b/arch/um/include/asm/pgalloc.h
> > > @@ -25,8 +25,8 @@
> > >   extern pgd_t *pgd_alloc(struct mm_struct *);
> > >   extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> > > -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
> > > -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
> > > +extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
> > > +extern pgtable_t pte_alloc_one(struct mm_struct *);
> > If its Ok, let me handle this bit since otherwise it complicates things for
> > me.
> > 
> > >   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> > >   {
> > > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
> > > index 7485398d0737..1692da55e63a 100644
> > > --- a/arch/um/include/asm/pgtable.h
> > > +++ b/arch/um/include/asm/pgtable.h
> > > @@ -359,4 +359,7 @@ do {						\
> > >   	__flush_tlb_one((vaddr));		\
> > >   } while (0)
> > > +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd);
> > > +
> > >   #endif
> > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> > > index 763d35bdda01..d17b74184ba0 100644
> > > --- a/arch/um/kernel/tlb.c
> > > +++ b/arch/um/kernel/tlb.c
> > > @@ -647,3 +647,9 @@ void force_flush_all(void)
> > >   		vma = vma->vm_next;
> > >   	}
> > >   }
> > > +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > +		pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > +	*pmdp = pmd;
> > > +}
> > > +
> > I believe this should be included in a separate patch since it is not related
> > specifically to pte_alloc argument removal. If you want, I could split it
> > into a separate patch for my series with you as author.
> 
> 
> Whichever is more convenient for you.

Ok.

> One thing to note - tlb flush is extremely expensive on uml.
> 
> I have lifted the definition of set_pmd_at from the mips tree and removed
> the tlb_flush_all from it for this exact reason.
> 
> If I read the original patch correctly, it does its own flush control so
> set_pmd_at does not need to do a force flush every time. It is done further
> up the chain.

That is correct. It is not done during the optimization, but is done later
after the pmds have moved.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 11:30     ` Kirill A. Shutemov
                         ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 18:02       ` David Miller
  -1 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: kirill
  Cc: joel, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: kirill
  Cc: joel, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: kirill
  Cc: joel, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, jcmvbkbc, nios2-dev, openrisc, peterz,
	richard

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: linux-riscv

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: kirill
  Cc: linux-mips, linux-ia64, linux-sh, peterz, catalin.marinas,
	dave.hansen, mhocko, linux-mm, lokeshgidra, joel, linux-riscv,
	elfring, jonas, linux-s390, dancol, linux-xtensa, linux-hexagon,
	deller, hughd, jejb, kasan-dev, kvmarm, mingo, geert, aryabinin,
	linux-snps-arc, kernel-team, fenghua.yu, jdike, linux-um,
	Julia.Lawall, linux-m68k, openrisc, bp, luto, nios2-dev, gxt,
	linux-arm-kernel, chris, richard, linux-parisc, pantin, jcmvbkbc,
	linux-kernel, minchan, linux-alpha, lftan, akpm, linuxppc-dev

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: kirill
  Cc: linux-mips, linux-ia64, linux-sh, peterz, catalin.marinas,
	dave.hansen, mhocko, linux-mm, lokeshgidra, joel, linux-riscv,
	elfring, jonas, linux-s390, dancol, linux-xtensa, linux-hexagon,
	deller, hughd, jejb, kasan-dev, kvmarm, mingo, geert, aryabinin,
	linux-snps-arc, kernel-team, fenghua.yu, jdike, linux-um,
	Julia.Lawall, linux-m68k, openrisc, bp, luto, nios2-dev, gxt,
	linux-arm-kernel, chris, richard, linux-parisc, pantin, jcmvbkbc,
	linux-kernel, minchan, linux-alpha, lftan, akpm, linuxppc-dev

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: linux-snps-arc

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:02       ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:02 UTC (permalink / raw)
  To: openrisc

From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Fri, 12 Oct 2018 14:30:56 +0300

> I looked into the code more and noticed move_pte() helper called from
> move_ptes(). It changes PTE entry to suite new address.
> 
> It is only defined in non-trivial way on Sparc. I don't know much about
> Sparc and it's hard for me to say if the optimization will break anything
> there.
> 
> I think it worth to disable the optimization if __HAVE_ARCH_MOVE_PTE is
> defined. Or make architectures state explicitely that the optimization is
> safe.

What sparc is doing in move_pte() is flushing the data-cache
(synchronously) if the virtual address color of the mapping changes.

Hope this helps.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 12:50       ` Joel Fernandes
                           ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 18:18         ` David Miller
  -1 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: joel
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: joel
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: joel
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, jcmvbkbc, nios2-dev, openrisc, peterz,
	richard

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: linux-riscv

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: joel
  Cc: linux-mips, linux-ia64, linux-sh, peterz, catalin.marinas,
	dave.hansen, mhocko, linux-mm, lokeshgidra, linux-riscv, elfring,
	jonas, linux-s390, dancol, linux-xtensa, linux-hexagon, deller,
	hughd, jejb, kasan-dev, kvmarm, mingo, geert, aryabinin,
	linux-snps-arc, kernel-team, fenghua.yu, jdike, linux-um,
	Julia.Lawall, linux-m68k, openrisc, bp, luto, nios2-dev, kirill,
	gxt, linux-arm-kernel, chris, richard, linux-parisc, pantin,
	jcmvbkbc, linux-kernel, minchan, linux-alpha, lftan, akpm,
	linuxppc-dev

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: joel
  Cc: linux-mips, linux-ia64, linux-sh, peterz, catalin.marinas,
	dave.hansen, mhocko, linux-mm, lokeshgidra, linux-riscv, elfring,
	jonas, linux-s390, dancol, linux-xtensa, linux-hexagon, deller,
	hughd, jejb, kasan-dev, kvmarm, mingo, geert, aryabinin,
	linux-snps-arc, kernel-team, fenghua.yu, jdike, linux-um,
	Julia.Lawall, linux-m68k, openrisc, bp, luto, nios2-dev, kirill,
	gxt, linux-arm-kernel, chris, richard, linux-parisc, pantin,
	jcmvbkbc, linux-kernel, minchan, linux-alpha, lftan, akpm,
	linuxppc-dev

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: linux-snps-arc

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 18:18         ` David Miller
  0 siblings, 0 replies; 317+ messages in thread
From: David Miller @ 2018-10-12 18:18 UTC (permalink / raw)
  To: openrisc

From: Joel Fernandes <joel@joelfernandes.org>
Date: Fri, 12 Oct 2018 05:50:46 -0700

> If its an issue, then how do transparent huge pages work on Sparc?  I don't
> see the huge page code (move_huge_pages) during mremap doing anything special
> for Sparc architecture when moving PMDs..

This is because all huge pages are larger than SHMLBA.  So no cache flushing
necessary.

> Also, do we not flush the caches from any path when we munmap
> address space?  We do call do_munmap on the old mapping from mremap
> after moving to the new one.

Sparc makes sure that shared mapping have consistent colors.  Therefore
all that's left are private mappings and those will be initialized by
block stores to clear the page out or similar.

Also, when creating new mappings, we flush the D-cache when necessary
in update_mmu_cache().

We also maintain a bit in the page struct to track when a page which
was potentially written to on one cpu ends up mapped into another
address space and flush as necessary.

The cache is write-through, which simplifies the preconditions we have
to maintain.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12  1:37 ` Joel Fernandes (Google)
                     ` (6 preceding siblings ...)
  (?)
@ 2018-10-12 18:51   ` SF Markus Elfring
  -1 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: Joel Fernandes, kernel-janitors
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo

PiBUaGUgY2hhbmdlcyB3ZXJlIG9idGFpbmVkIGJ5IGFwcGx5aW5nIHRoZSBmb2xsb3dpbmcgQ29j
Y2luZWxsZSBzY3JpcHQuCgpBIGJpdCBvZiBjbGFyaWZpY2F0aW9uIGhhcHBlbmVkIGZvciBpdHMg
aW1wbGVtZW50YXRpb24gZGV0YWlscy4KaHR0cHM6Ly9zeXN0ZW1lLmxpcDYuZnIvcGlwZXJtYWls
L2NvY2NpLzIwMTgtT2N0b2Jlci8wMDUzNzQuaHRtbAoKSSBoYXZlIHRha2VuIGFsc28gYW5vdGhl
ciBsb29rIGF0IHRoZSBmb2xsb3dpbmcgU21QTCBjb2RlLgoKCj4gaWRlbnRpZmllciBmbiA9fgo+
ICJeKF9fcHRlX2FsbG9jfHB0ZV9hbGxvY19vbmV8cHRlX2FsbG9jfF9fcHRlX2FsbG9jX2tlcm5l
bHxwdGVfYWxsb2Nfb25lX2tlcm5lbCkkIjsKCkkgc3VnZ2VzdCB0byBhZGp1c3QgdGhlIHJlZ3Vs
YXIgZXhwcmVzc2lvbiBmb3IgdGhpcyBjb25zdHJhaW50CmFuZCBpbiBzdWJzZXF1ZW50IFNtUEwg
cnVsZXMuCgoiXig/OnB0ZV9hbGxvYyg/Ol9vbmUoPzpfa2VybmVsKT8pP3xfX3B0ZV9hbGxvYyg/
Ol9rZXJuZWwpPykkIjsKCgo+ICgKPiAtIFQzIGZuKFQxIEUxLCBUMiBFMik7Cj4gKyBUMyBmbihU
MSBFMSk7Cj4gfAo+IC0gVDMgZm4oVDEgRTEsIFQyIEUyLCBUNCBFNCk7Cj4gKyBUMyBmbihUMSBF
MSwgVDIgRTIpOwo+ICkKCkkgcHJvcG9zZSB0byB0YWtlIGFuIG90aGVyIFNtUEwgZGlzanVuY3Rp
b24gaW50byBhY2NvdW50IGhlcmUuCgogVDMgZm4oVDEgRTEsCigKLSAgICAgIFQyIEUyCnwgICAg
ICBUMiBFMiwKLSAgICAgIFQ0IEU0CikgICAgICApOwoKCj4gKAo+IC0gI2RlZmluZSBmbihhLCBi
LCBjKUBwIGUKPiArICNkZWZpbmUgZm4oYSwgYikgZQo+IHwKPiAtICNkZWZpbmUgZm4oYSwgYilA
cCBlCj4gKyAjZGVmaW5lIGZuKGEpIGUKPiApCgpIb3cgZG8geW91IHRoaW5rIGFib3V0IHRvIG9t
aXQgdGhlIG1ldGF2YXJpYWJsZSDigJxwb3NpdGlvbiBw4oCdIGhlcmU/CgpSZWdhcmRzLApNYXJr
dXMKCl9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fCmxpbnV4
LXNucHMtYXJjIG1haWxpbmcgbGlzdApsaW51eC1zbnBzLWFyY0BsaXN0cy5pbmZyYWRlYWQub3Jn
Cmh0dHA6Ly9saXN0cy5pbmZyYWRlYWQub3JnL21haWxtYW4vbGlzdGluZm8vbGludXgtc25wcy1h
cmM=

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: Joel Fernandes, kernel-janitors
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable “position p” here?

Regards,
Markus

_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: Joel Fernandes, kernel-janitors
  Cc: linux-kernel, kernel-team, Michal Hocko, Julia Lawall,
	Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Daniel Colascione, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Hugh Dickins, Ingo Molnar, James E. J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	pantin, Lokesh Gidra, Max Filippov, Minchan Kim, nios2-dev,
	openrisc, Peter Zijlstra, Richard Weinberger, Rich Felker,
	Sam Creasey, sparclinux, Stafford Horne, Stefan Kristiansson,
	Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Kirill A. Shutemov, Andrew Morton

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable “position p” here?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: linux-riscv

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable ?position p? here?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: Joel Fernandes, kernel-janitors
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Kirill A. Shutemov,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	linux-kernel, Minchan Kim, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, Andrew Morton, linuxppc-dev, David S. Miller

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable “position p” here?

Regards,
Markus

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: Joel Fernandes, kernel-janitors
  Cc: linux-kernel, kernel-team, Michal Hocko, Julia Lawall,
	Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Daniel Colascione, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Hugh Dickins, Ingo Molnar, James E. J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	pantin, Lokesh Gidra, Max Filippov, Minchan Kim, nios2-dev,
	openrisc, Peter Zijlstra, Richard Weinberger, Rich Felker,
	Sam Creasey, sparclinux, Stafford Horne, Stefan Kristiansson,
	Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Kirill A. Shutemov, Andrew Morton

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable a??position pa?? here?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: Joel Fernandes, kernel-janitors
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Kirill A. Shutemov,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	linux-kernel, Minchan Kim, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, Andrew Morton, linuxppc-dev, David S. Miller

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable “position p” here?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: linux-snps-arc

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable ?position p? here?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 18:51   ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-12 18:51 UTC (permalink / raw)
  To: openrisc

> The changes were obtained by applying the following Coccinelle script.

A bit of clarification happened for its implementation details.
https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html

I have taken also another look at the following SmPL code.


> identifier fn =~
> "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";

I suggest to adjust the regular expression for this constraint
and in subsequent SmPL rules.

"^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";


> (
> - T3 fn(T1 E1, T2 E2);
> + T3 fn(T1 E1);
> |
> - T3 fn(T1 E1, T2 E2, T4 E4);
> + T3 fn(T1 E1, T2 E2);
> )

I propose to take an other SmPL disjunction into account here.

 T3 fn(T1 E1,
(
-      T2 E2
|      T2 E2,
-      T4 E4
)      );


> (
> - #define fn(a, b, c)@p e
> + #define fn(a, b) e
> |
> - #define fn(a, b)@p e
> + #define fn(a) e
> )

How do you think about to omit the metavariable “position p” here?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12 18:51   ` SF Markus Elfring
                       ` (6 preceding siblings ...)
  (?)
@ 2018-10-12 19:42     ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo

T24gRnJpLCBPY3QgMTIsIDIwMTggYXQgMDg6NTE6NDVQTSArMDIwMCwgU0YgTWFya3VzIEVsZnJp
bmcgd3JvdGU6Cj4gPiBUaGUgY2hhbmdlcyB3ZXJlIG9idGFpbmVkIGJ5IGFwcGx5aW5nIHRoZSBm
b2xsb3dpbmcgQ29jY2luZWxsZSBzY3JpcHQuCj4gCj4gQSBiaXQgb2YgY2xhcmlmaWNhdGlvbiBo
YXBwZW5lZCBmb3IgaXRzIGltcGxlbWVudGF0aW9uIGRldGFpbHMuCj4gaHR0cHM6Ly9zeXN0ZW1l
LmxpcDYuZnIvcGlwZXJtYWlsL2NvY2NpLzIwMTgtT2N0b2Jlci8wMDUzNzQuaHRtbAo+IAo+IEkg
aGF2ZSB0YWtlbiBhbHNvIGFub3RoZXIgbG9vayBhdCB0aGUgZm9sbG93aW5nIFNtUEwgY29kZS4K
PiAKPiAKPiA+IGlkZW50aWZpZXIgZm4gPX4KPiA+ICJeKF9fcHRlX2FsbG9jfHB0ZV9hbGxvY19v
bmV8cHRlX2FsbG9jfF9fcHRlX2FsbG9jX2tlcm5lbHxwdGVfYWxsb2Nfb25lX2tlcm5lbCkkIjsK
PiAKPiBJIHN1Z2dlc3QgdG8gYWRqdXN0IHRoZSByZWd1bGFyIGV4cHJlc3Npb24gZm9yIHRoaXMg
Y29uc3RyYWludAo+IGFuZCBpbiBzdWJzZXF1ZW50IFNtUEwgcnVsZXMuCj4gIl4oPzpwdGVfYWxs
b2MoPzpfb25lKD86X2tlcm5lbCk/KT98X19wdGVfYWxsb2MoPzpfa2VybmVsKT8pJCI7CgpTdXJl
IGl0IGxvb2tzIG1vcmUgY2xldmVyLCBidXQgd2h5PyBVZ2ggdGhhdCdzIGhhcmRlciB0byByZWFk
IGFuZCBjb25mdXNpbmcuCgo+ID4gKAo+ID4gLSBUMyBmbihUMSBFMSwgVDIgRTIpOwo+ID4gKyBU
MyBmbihUMSBFMSk7Cj4gPiB8Cj4gPiAtIFQzIGZuKFQxIEUxLCBUMiBFMiwgVDQgRTQpOwo+ID4g
KyBUMyBmbihUMSBFMSwgVDIgRTIpOwo+ID4gKQo+IAo+IEkgcHJvcG9zZSB0byB0YWtlIGFuIG90
aGVyIFNtUEwgZGlzanVuY3Rpb24gaW50byBhY2NvdW50IGhlcmUuCj4gCj4gIFQzIGZuKFQxIEUx
LAo+ICgKPiAtICAgICAgVDIgRTIKPiB8ICAgICAgVDIgRTIsCj4gLSAgICAgIFQ0IEU0Cj4gKSAg
ICAgICk7CgpBZ2FpbiB0aGlzIGlzIGNvbmZ1c2luZy4gSXQgbWFrZXMgb25lIHRoaW5rIHRoYXQg
bWF5YmUgdGhlIHNlY29uZCBhcmd1bWVudApjYW4gYWxzbyBiZSByZW1vdmVkIGFuZCByZXF1aXJl
cyBjYXJlZnVsIG9ic2VydmF0aW9uIHRoYXQgdGhlICIpOyIgZm9sbG93cy4KCj4gPiAoCj4gPiAt
ICNkZWZpbmUgZm4oYSwgYiwgYylAcCBlCj4gPiArICNkZWZpbmUgZm4oYSwgYikgZQo+ID4gfAo+
ID4gLSAjZGVmaW5lIGZuKGEsIGIpQHAgZQo+ID4gKyAjZGVmaW5lIGZuKGEpIGUKPiA+ICkKPiAK
PiBIb3cgZG8geW91IHRoaW5rIGFib3V0IHRvIG9taXQgdGhlIG1ldGF2YXJpYWJsZSDigJxwb3Np
dGlvbiBw4oCdIGhlcmU/CgpSaWdodCwgSSBkb24ndCBuZWVkIGl0IGluIHRoaXMgY2FzZS4gQnV0
IHRoZSBzY3JpcHQgd29ya3MgZWl0aGVyIHdheS4KCkkgbGlrZSB0byB0YWtlIG1vcmUgb2YgYSBw
cm9ibGVtIHNvbHZpbmcgYXBwcm9hY2ggdGhhdCBtYWtlcyBzZW5zZSwgdGhhbgphaW1pbmcgZm9y
IHBlcmZlY3Rpb24sIGFmdGVyIGFsbCB0aGlzIGlzIGEgdXNlZnVsIHNjcmlwdCB0aGF0IHdlIGRv
IG5vdApuZWVkIHRvIGNoZWNrIGluIG9uY2Ugd2UgZmluaXNoIHdpdGggaXQuCgogLSBKb2VsCgoK
X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KbGludXgtcmlz
Y3YgbWFpbGluZyBsaXN0CmxpbnV4LXJpc2N2QGxpc3RzLmluZnJhZGVhZC5vcmcKaHR0cDovL2xp
c3RzLmluZnJhZGVhZC5vcmcvbWFpbG1hbi9saXN0aW5mby9saW51eC1yaXNjdgo=

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable “position p” here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: kernel-janitors, linux-kernel, kernel-team, Michal Hocko,
	Julia Lawall, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Daniel Colascione, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Hugh Dickins, Ingo Molnar, James E. J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	pantin, Lokesh Gidra, Max Filippov, Minchan Kim, nios2-dev,
	openrisc, Peter Zijlstra, Richard Weinberger, Rich Felker,
	Sam Creasey, sparclinux, Stafford Horne, Stefan Kristiansson,
	Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Kirill A. Shutemov, Andrew Morton

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable “position p” here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable ?position p? here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Will Deacon, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Kirill A. Shutemov,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	linux-kernel, Minchan Kim, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, Andrew Morton, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable “position p” here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: kernel-janitors, linux-kernel, kernel-team, Michal Hocko,
	Julia Lawall, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Daniel Colascione, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Hugh Dickins, Ingo Molnar, James E. J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	pantin, Lokesh Gidra, Max Filippov, Minchan Kim, nios2-dev,
	openrisc, Peter Zijlstra, Richard Weinberger, Rich Felker,
	Sam Creasey, sparclinux, Stafford Horne, Stefan Kristiansson,
	Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Kirill A. Shutemov, Andrew Morton

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable a??position pa?? here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Will Deacon, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Kirill A. Shutemov,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	linux-kernel, Minchan Kim, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, Andrew Morton, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable “position p” here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable ?position p? here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-12 19:42     ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-12 19:42 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 08:51:45PM +0200, SF Markus Elfring wrote:
> > The changes were obtained by applying the following Coccinelle script.
> 
> A bit of clarification happened for its implementation details.
> https://systeme.lip6.fr/pipermail/cocci/2018-October/005374.html
> 
> I have taken also another look at the following SmPL code.
> 
> 
> > identifier fn =~
> > "^(__pte_alloc|pte_alloc_one|pte_alloc|__pte_alloc_kernel|pte_alloc_one_kernel)$";
> 
> I suggest to adjust the regular expression for this constraint
> and in subsequent SmPL rules.
> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";

Sure it looks more clever, but why? Ugh that's harder to read and confusing.

> > (
> > - T3 fn(T1 E1, T2 E2);
> > + T3 fn(T1 E1);
> > |
> > - T3 fn(T1 E1, T2 E2, T4 E4);
> > + T3 fn(T1 E1, T2 E2);
> > )
> 
> I propose to take an other SmPL disjunction into account here.
> 
>  T3 fn(T1 E1,
> (
> -      T2 E2
> |      T2 E2,
> -      T4 E4
> )      );

Again this is confusing. It makes one think that maybe the second argument
can also be removed and requires careful observation that the ");" follows.

> > (
> > - #define fn(a, b, c)@p e
> > + #define fn(a, b) e
> > |
> > - #define fn(a, b)@p e
> > + #define fn(a) e
> > )
> 
> How do you think about to omit the metavariable “position p” here?

Right, I don't need it in this case. But the script works either way.

I like to take more of a problem solving approach that makes sense, than
aiming for perfection, after all this is a useful script that we do not
need to check in once we finish with it.

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 16:57           ` Joel Fernandes
                               ` (5 preceding siblings ...)
  (?)
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov,
	linux-kernel, minchan, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018@04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018@05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018@02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018@06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:33             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:33 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 09:57:19AM -0700, Joel Fernandes wrote:
> On Fri, Oct 12, 2018 at 04:19:46PM +0300, Kirill A. Shutemov wrote:
> > On Fri, Oct 12, 2018 at 05:50:46AM -0700, Joel Fernandes wrote:
> > > On Fri, Oct 12, 2018 at 02:30:56PM +0300, Kirill A. Shutemov wrote:
> > > > On Thu, Oct 11, 2018 at 06:37:56PM -0700, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during memory management
> > > > > related operations. The mremap system call can be really slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map. Turning on THP
> > > > > may not be a viable option, and is not for us. This patch speeds up the
> > > > > performance for non-THP system by copying at the PMD level when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > > 
> > > > I looked into the code more and noticed move_pte() helper called from
> > > > move_ptes(). It changes PTE entry to suite new address.
> > > > 
> > > > It is only defined in non-trivial way on Sparc. I don't know much about
> > > > Sparc and it's hard for me to say if the optimization will break anything
> > > > there.
> > > 
> > > Sparc's move_pte seems to be flushing the D-cache to prevent aliasing. It is
> > > not modifying the PTE itself AFAICS:
> > > 
> > > #ifdef DCACHE_ALIASING_POSSIBLE
> > > #define __HAVE_ARCH_MOVE_PTE
> > > #define move_pte(pte, prot, old_addr, new_addr)                         \
> > > ({                                                                      \
> > >         pte_t newpte = (pte);                                           \
> > >         if (tlb_type != hypervisor && pte_present(pte)) {               \
> > >                 unsigned long this_pfn = pte_pfn(pte);                  \
> > >                                                                         \
> > >                 if (pfn_valid(this_pfn) &&                              \
> > >                     (((old_addr) ^ (new_addr)) & (1 << 13)))            \
> > >                         flush_dcache_page_all(current->mm,              \
> > >                                               pfn_to_page(this_pfn));   \
> > >         }                                                               \
> > >         newpte;                                                         \
> > > })
> > > #endif
> > > 
> > > If its an issue, then how do transparent huge pages work on Sparc?  I don't
> > > see the huge page code (move_huge_pages) during mremap doing anything special
> > > for Sparc architecture when moving PMDs..
> > 
> > My *guess* is that it will work fine on Sparc as it apprarently it only
> > cares about change in bit 13 of virtual address. It will never happen for
> > huge pages or when PTE page tables move.
> > 
> > But I just realized that the problem is bigger: since we pass new_addr to
> > the set_pte_at() we would need to audit all implementations that they are
> > safe with just moving PTE page table.
> > 
> > I would rather go with per-architecture enabling. It's much safer.
> 
> I'm Ok with the per-arch enabling, I agree its safer. So I should be adding a
> a new __HAVE_ARCH_MOVE_PMD right, or did you have a better name for that?

I believe Kconfig option is more cononical way to do this nowadays.
So CONFIG_HAVE_ARCH_MOVE_PMD, I guess. Or CONFIG_HAVE_MOVE_PMD.
An arch that supports it would select the option.

> Also, do you feel we should still need to remove the address argument from
> set_pte_alloc? Or should we leave that alone if we do per-arch?
> I figure I spent a bunch of time on that already anyway, and its a clean up
> anyway, so may as well do it. But perhaps that "pte_alloc cleanup" can then
> be a separate patch independent of this series?

Yeah. The cleanup makes sense anyway.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 16:42           ` Anton Ivanov
                               ` (7 preceding siblings ...)
  (?)
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  -1 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> =

> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying=
 each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > =

> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the m=
remap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > =

> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > =

> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > =

> > > > > Incase THP is enabled, the optimization is skipped. I also flush =
the
> > > > > tlb every time we do this optimization since I couldn't find a wa=
y to
> > > > > determine if the low-level PTEs are dirty. It is seen that the co=
st of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > =

> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > > =A0=A0 mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > =A0=A0 1 file changed, 62 insertions(+)
> > > > > =

> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 drop_rmap_locks(vma);
> > > > > =A0=A0 }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +=A0=A0=A0=A0=A0=A0=A0=A0=A0 unsigned long new_addr, unsigned lon=
g old_end,
> > > > > +=A0=A0=A0=A0=A0=A0=A0=A0=A0 pmd_t *old_pmd, pmd_t *new_pmd, bool=
 *need_flush)
> > > > > +{
> > > > > +=A0=A0=A0 spinlock_t *old_ptl, *new_ptl;
> > > > > +=A0=A0=A0 struct mm_struct *mm =3D vma->vm_mm;
> > > > > +
> > > > > +=A0=A0=A0 if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +=A0=A0=A0=A0=A0=A0=A0 || old_end - old_addr < PMD_SIZE)
> > > > > +=A0=A0=A0=A0=A0=A0=A0 return false;
> > > > > +
> > > > > +=A0=A0=A0 /*
> > > > > +=A0=A0=A0=A0 * The destination pmd shouldn't be established, fre=
e_pgtables()
> > > > > +=A0=A0=A0=A0 * should have release it.
> > > > > +=A0=A0=A0=A0 */
> > > > > +=A0=A0=A0 if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +=A0=A0=A0=A0=A0=A0=A0 return false;
> > > > > +
> > > > > +=A0=A0=A0 /*
> > > > > +=A0=A0=A0=A0 * We don't have to worry about the ordering of src =
and dst
> > > > > +=A0=A0=A0=A0 * ptlocks because exclusive mmap_sem prevents deadl=
ock.
> > > > > +=A0=A0=A0=A0 */
> > > > > +=A0=A0=A0 old_ptl =3D pmd_lock(vma->vm_mm, old_pmd);
> > > > > +=A0=A0=A0 if (old_ptl) {
> > > > > +=A0=A0=A0=A0=A0=A0=A0 pmd_t pmd;
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 new_ptl =3D pmd_lockptr(mm, new_pmd);
> > > > > +=A0=A0=A0=A0=A0=A0=A0 if (new_ptl !=3D old_ptl)
> > > > > +=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 spin_lock_nested(new_ptl, SING=
LE_DEPTH_NESTING);
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 /* Clear the pmd */
> > > > > +=A0=A0=A0=A0=A0=A0=A0 pmd =3D *old_pmd;
> > > > > +=A0=A0=A0=A0=A0=A0=A0 pmd_clear(old_pmd);
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +=A0=A0=A0=A0=A0=A0=A0 /* Set the new pmd */
> > > > > +=A0=A0=A0=A0=A0=A0=A0 set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > =

> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > =

> > Presently it does not.
> > =

> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> =

> Once this problem as well as the omissions in the include changes for UML=
 in
> patch one have been fixed it appears to be working.
> =

> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- =

 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan at kernel.org
> > > > > Cc: pantin at google.com
> > > > > Cc: hughd at google.com
> > > > > Cc: lokeshgidra at google.com
> > > > > Cc: dancol at google.com
> > > > > Cc: mhocko at kernel.org
> > > > > Cc: kirill at shutemov.name
> > > > > Cc: akpm at linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > > ?? mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > ?? 1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > ?????????? drop_rmap_locks(vma);
> > > > > ?? }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +????????? unsigned long new_addr, unsigned long old_end,
> > > > > +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +??? spinlock_t *old_ptl, *new_ptl;
> > > > > +??? struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +??????? || old_end - old_addr < PMD_SIZE)
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * The destination pmd shouldn't be established, free_pgtables()
> > > > > +???? * should have release it.
> > > > > +???? */
> > > > > +??? if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * We don't have to worry about the ordering of src and dst
> > > > > +???? * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +???? */
> > > > > +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +??? if (old_ptl) {
> > > > > +??????? pmd_t pmd;
> > > > > +
> > > > > +??????? new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +??????? if (new_ptl != old_ptl)
> > > > > +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +??????? /* Clear the pmd */
> > > > > +??????? pmd = *old_pmd;
> > > > > +??????? pmd_clear(old_pmd);
> > > > > +
> > > > > +??????? VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +??????? /* Set the new pmd */
> > > > > +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan at kernel.org
> > > > > Cc: pantin at google.com
> > > > > Cc: hughd at google.com
> > > > > Cc: lokeshgidra at google.com
> > > > > Cc: dancol at google.com
> > > > > Cc: mhocko at kernel.org
> > > > > Cc: kirill at shutemov.name
> > > > > Cc: akpm at linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> > > > > ---
> > > > > ?? mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > ?? 1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > ?????????? drop_rmap_locks(vma);
> > > > > ?? }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +????????? unsigned long new_addr, unsigned long old_end,
> > > > > +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +??? spinlock_t *old_ptl, *new_ptl;
> > > > > +??? struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +??????? || old_end - old_addr < PMD_SIZE)
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * The destination pmd shouldn't be established, free_pgtables()
> > > > > +???? * should have release it.
> > > > > +???? */
> > > > > +??? if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +??????? return false;
> > > > > +
> > > > > +??? /*
> > > > > +???? * We don't have to worry about the ordering of src and dst
> > > > > +???? * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +???? */
> > > > > +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +??? if (old_ptl) {
> > > > > +??????? pmd_t pmd;
> > > > > +
> > > > > +??????? new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +??????? if (new_ptl != old_ptl)
> > > > > +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +??????? /* Clear the pmd */
> > > > > +??????? pmd = *old_pmd;
> > > > > +??????? pmd_clear(old_pmd);
> > > > > +
> > > > > +??????? VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +??????? /* Set the new pmd */
> > > > > +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: openrisc

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan at kernel.org
> > > > > Cc: pantin at google.com
> > > > > Cc: hughd at google.com
> > > > > Cc: lokeshgidra at google.com
> > > > > Cc: dancol at google.com
> > > > > Cc: mhocko at kernel.org
> > > > > Cc: kirill at shutemov.name
> > > > > Cc: akpm at linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > >    mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > >            drop_rmap_locks(vma);
> > > > >    }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +          unsigned long new_addr, unsigned long old_end,
> > > > > +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +    spinlock_t *old_ptl, *new_ptl;
> > > > > +    struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +        || old_end - old_addr < PMD_SIZE)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * The destination pmd shouldn't be established, free_pgtables()
> > > > > +     * should have release it.
> > > > > +     */
> > > > > +    if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * We don't have to worry about the ordering of src and dst
> > > > > +     * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +     */
> > > > > +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +    if (old_ptl) {
> > > > > +        pmd_t pmd;
> > > > > +
> > > > > +        new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +        if (new_ptl != old_ptl)
> > > > > +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +        /* Clear the pmd */
> > > > > +        pmd = *old_pmd;
> > > > > +        pmd_clear(old_pmd);
> > > > > +
> > > > > +        VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +        /* Set the new pmd */
> > > > > +        set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-12 21:40             ` Kirill A. Shutemov
  0 siblings, 0 replies; 317+ messages in thread
From: Kirill A. Shutemov @ 2018-10-12 21:40 UTC (permalink / raw)
  To: Anton Ivanov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
> 
> On 10/12/18 3:48 PM, Anton Ivanov wrote:
> > On 12/10/2018 15:37, Kirill A. Shutemov wrote:
> > > On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
> > > > On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
> > > > > Android needs to mremap large regions of memory during
> > > > > memory management
> > > > > related operations. The mremap system call can be really
> > > > > slow if THP is
> > > > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > > > pte at a time, and can be really slow across a large map.
> > > > > Turning on THP
> > > > > may not be a viable option, and is not for us. This patch
> > > > > speeds up the
> > > > > performance for non-THP system by copying at the PMD level
> > > > > when possible.
> > > > > 
> > > > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > > > completion times drops from 160-250 millesconds to 380-400
> > > > > microseconds.
> > > > > 
> > > > > Before:
> > > > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > > > 
> > > > > After:
> > > > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > > > 
> > > > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > > > tlb every time we do this optimization since I couldn't find a way to
> > > > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > > > doing so is not much compared the improvement, on both
> > > > > x86-64 and arm64.
> > > > > 
> > > > > Cc: minchan@kernel.org
> > > > > Cc: pantin@google.com
> > > > > Cc: hughd@google.com
> > > > > Cc: lokeshgidra@google.com
> > > > > Cc: dancol@google.com
> > > > > Cc: mhocko@kernel.org
> > > > > Cc: kirill@shutemov.name
> > > > > Cc: akpm@linux-foundation.org
> > > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > ---
> > > > > �� mm/mremap.c | 62
> > > > > +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > �� 1 file changed, 62 insertions(+)
> > > > > 
> > > > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > > > index 9e68a02a52b1..d82c485822ef 100644
> > > > > --- a/mm/mremap.c
> > > > > +++ b/mm/mremap.c
> > > > > @@ -191,6 +191,54 @@ static void move_ptes(struct
> > > > > vm_area_struct *vma, pmd_t *old_pmd,
> > > > > ���������� drop_rmap_locks(vma);
> > > > > �� }
> > > > > +static bool move_normal_pmd(struct vm_area_struct *vma,
> > > > > unsigned long old_addr,
> > > > > +��������� unsigned long new_addr, unsigned long old_end,
> > > > > +��������� pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > > > +{
> > > > > +��� spinlock_t *old_ptl, *new_ptl;
> > > > > +��� struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > +��� if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > > > +������� || old_end - old_addr < PMD_SIZE)
> > > > > +������� return false;
> > > > > +
> > > > > +��� /*
> > > > > +���� * The destination pmd shouldn't be established, free_pgtables()
> > > > > +���� * should have release it.
> > > > > +���� */
> > > > > +��� if (WARN_ON(!pmd_none(*new_pmd)))
> > > > > +������� return false;
> > > > > +
> > > > > +��� /*
> > > > > +���� * We don't have to worry about the ordering of src and dst
> > > > > +���� * ptlocks because exclusive mmap_sem prevents deadlock.
> > > > > +���� */
> > > > > +��� old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > > > +��� if (old_ptl) {
> > > > > +������� pmd_t pmd;
> > > > > +
> > > > > +������� new_ptl = pmd_lockptr(mm, new_pmd);
> > > > > +������� if (new_ptl != old_ptl)
> > > > > +����������� spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > > > +
> > > > > +������� /* Clear the pmd */
> > > > > +������� pmd = *old_pmd;
> > > > > +������� pmd_clear(old_pmd);
> > > > > +
> > > > > +������� VM_BUG_ON(!pmd_none(*new_pmd));
> > > > > +
> > > > > +������� /* Set the new pmd */
> > > > > +������� set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > > UML does not have set_pmd_at at all
> > > Every architecture does. :)
> > 
> > I tried to build it patching vs 4.19-rc before I made this statement and
> > ran into that.
> > 
> > Presently it does not.
> > 
> > https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
> > on the list.
> 
> Once this problem as well as the omissions in the include changes for UML in
> patch one have been fixed it appears to be working.
> 
> What it needs is attached.

Well, the optization is only suitable for arch that has 3 or more levels
of page tables. Otherwise it will not have [non-folded] pmd.

And in this case arch/um already should have set_pmd_at(), see
3_LEVEL_PGTABLES.

To port on 2-level paging, it has to be handled on pgd level. It
complicates the code and will not bring much value.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 18:18         ` David Miller
                             ` (5 preceding siblings ...)
  (?)
@ 2018-10-13  1:35           ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: David Miller
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: David Miller
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, l

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: David Miller
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, jcmvbkbc, nios2-dev, peterz, richard

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: David Miller
  Cc: linux-mips, linux-ia64, linux-sh, peterz, catalin.marinas,
	dave.hansen, mhocko, linux-mm, lokeshgidra, linux-riscv, elfring,
	jonas, linux-s390, dancol, linux-xtensa, linux-hexagon, deller,
	hughd, jejb, kasan-dev, kvmarm, mingo, geert, aryabinin,
	linux-snps-arc, kernel-team, fenghua.yu, jdike, linux-um,
	Julia.Lawall, linux-m68k, bp, luto, nios2-dev, kirill, gxt,
	chris, richard, linux-parisc, pantin, jcmvbkbc, linux-kernel,
	minchan, linux-alpha, lftan, akpm, linuxppc-dev

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: David Miller
  Cc: linux-mips, linux-ia64, linux-sh, peterz, catalin.marinas,
	dave.hansen, mhocko, linux-mm, lokeshgidra, linux-riscv, elfring,
	jonas, linux-s390, dancol, linux-xtensa, linux-hexagon, deller,
	hughd, jejb, kasan-dev, kvmarm, mingo, geert, aryabinin,
	linux-snps-arc, kernel-team, fenghua.yu, jdike, linux-um,
	Julia.Lawall, linux-m68k, bp, luto, nios2-dev, kirill, gxt,
	chris, richard, linux-parisc, pantin, jcmvbkbc, linux-kernel,
	minchan, linux-alpha, lftan, akpm, linuxppc-dev

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel at joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:35           ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:35 UTC (permalink / raw)
  To: David Miller
  Cc: kirill, linux-kernel, kernel-team, minchan, pantin, hughd,
	lokeshgidra, dancol, mhocko, akpm, aryabinin, luto, bp,
	catalin.marinas, chris, dave.hansen, elfring, fenghua.yu, geert,
	gxt, deller, mingo, jejb, jdike, jonas, Julia.Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips

On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
> From: Joel Fernandes <joel@joelfernandes.org>
[...]
> > Also, do we not flush the caches from any path when we munmap
> > address space?  We do call do_munmap on the old mapping from mremap
> > after moving to the new one.
> 
> Sparc makes sure that shared mapping have consistent colors.  Therefore
> all that's left are private mappings and those will be initialized by
> block stores to clear the page out or similar.
> 
> Also, when creating new mappings, we flush the D-cache when necessary
> in update_mmu_cache().
> 
> We also maintain a bit in the page struct to track when a page which
> was potentially written to on one cpu ends up mapped into another
> address space and flush as necessary.
> 
> The cache is write-through, which simplifies the preconditions we have
> to maintain.

Makes sense, thanks. For the moment I sent patches to enable this on arm64
and x86. We can enable it on sparc as well at a later time as it sounds it
could be a safe optimization to apply to that architecture as well.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-13  1:35           ` Joel Fernandes
                               ` (5 preceding siblings ...)
  (?)
@ 2018-10-13  1:39             ` Daniel Colascione
  -1 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux-hexagon, linux-ia64, linux-m68k, linux-mips, linux-mm,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	linux-snps-arc, linux-um, linux-xtensa, jcmvbkbc, nios2-dev,
	Peter Zijlstra, richard

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: linux-riscv

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: linux-snps-arc

Not 32-bit ARM?

On Fri, Oct 12, 2018@6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018@11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel at joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:39             ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:39 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

Not 32-bit ARM?

On Fri, Oct 12, 2018 at 6:35 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 11:18:36AM -0700, David Miller wrote:
>> From: Joel Fernandes <joel@joelfernandes.org>
> [...]
>> > Also, do we not flush the caches from any path when we munmap
>> > address space?  We do call do_munmap on the old mapping from mremap
>> > after moving to the new one.
>>
>> Sparc makes sure that shared mapping have consistent colors.  Therefore
>> all that's left are private mappings and those will be initialized by
>> block stores to clear the page out or similar.
>>
>> Also, when creating new mappings, we flush the D-cache when necessary
>> in update_mmu_cache().
>>
>> We also maintain a bit in the page struct to track when a page which
>> was potentially written to on one cpu ends up mapped into another
>> address space and flush as necessary.
>>
>> The cache is write-through, which simplifies the preconditions we have
>> to maintain.
>
> Makes sense, thanks. For the moment I sent patches to enable this on arm64
> and x86. We can enable it on sparc as well at a later time as it sounds it
> could be a safe optimization to apply to that architecture as well.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-13  1:39             ` Daniel Colascione
                                 ` (5 preceding siblings ...)
  (?)
@ 2018-10-13  1:44               ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux-hexagon, linux-ia64, linux-m68k, linux-mips, linux-mm,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	linux-snps-arc, linux-um, linux-xtensa, jcmvbkbc, nios2-dev,
	Peter Zijlstra, richard

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:44               ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  1:44 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
> Not 32-bit ARM?

Well, I didn't want to enable every possible architecture we could in a
single go. Certainly arm32 can be a follow on enablement as can be other
architectures. The point of this series is to upstream this feature and
enable a hand-picked few architectures as a first step.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-13  1:44               ` Joel Fernandes
                                   ` (5 preceding siblings ...)
  (?)
@ 2018-10-13  1:54                 ` Daniel Colascione
  -1 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux-hexagon, linux-ia64, linux-m68k, linux-mips, linux-mm,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	linux-snps-arc, linux-um, linux-xtensa, jcmvbkbc, nios2-dev,
	Peter Zijlstra, richard

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: linux-riscv

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: linux-snps-arc

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018@6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018@06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  1:54                 ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  1:54 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

I wonder whether it makes sense to expose to userspace somehow whether
mremap is "fast" for a particular architecture. If a feature relies on
fast mremap, it might be better for some userland component to disable
that feature entirely rather than blindly use mremap and end up
performing very poorly. If we're disabling fast mremap when THP is
enabled, the userland component can't just rely on an architecture
switch and some kind of runtime feature detection becomes even more
important.

On Fri, Oct 12, 2018 at 6:44 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:39:45PM -0700, Daniel Colascione wrote:
>> Not 32-bit ARM?
>
> Well, I didn't want to enable every possible architecture we could in a
> single go. Certainly arm32 can be a follow on enablement as can be other
> architectures. The point of this series is to upstream this feature and
> enable a hand-picked few architectures as a first step.
>
> thanks,
>
>  - Joel
>

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-13  1:54                 ` Daniel Colascione
                                     ` (5 preceding siblings ...)
  (?)
@ 2018-10-13  2:10                   ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha,
	linux-hexagon, linux-ia64, linux-m68k, linux-mips, linux-mm,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	linux-snps-arc, linux-um, linux-xtensa, jcmvbkbc, nios2-dev,
	Peter Zijlstra, richard

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, jonas, linux-s390,
	linux-xtensa, linux-hexagon, deller, hughd, jejb, kasan-dev,
	kvmarm, mingo, geert, aryabinin, linux-snps-arc, kernel-team,
	fenghua.yu, jdike, linux-um, Julia.Lawall, linux-m68k, bp, luto,
	nios2-dev, kirill, gxt, chris, richard, linux-parisc,
	Ramon Pantin, jcmvbkbc, linux-kernel, Minchan Kim, linux-alpha,
	lftan, Andrew Morton, linuxppc-dev, David Miller

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:10                   ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13  2:10 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, hughd, Lokesh Gidra, Michal Hocko, Andrew Morton,
	aryabinin, luto, bp, catalin.marinas, chris, dave.hansen,
	elfring, fenghua.yu, geert, gxt, deller, mingo, jejb, jdike,
	jonas, Julia.Lawall, kasan-dev, kvmarm, lftan, linux-alpha

On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
> I wonder whether it makes sense to expose to userspace somehow whether
> mremap is "fast" for a particular architecture. If a feature relies on
> fast mremap, it might be better for some userland component to disable
> that feature entirely rather than blindly use mremap and end up
> performing very poorly. If we're disabling fast mremap when THP is
> enabled, the userland component can't just rely on an architecture
> switch and some kind of runtime feature detection becomes even more
> important.

I hate to point out that its forbidden to top post on LKML :-)
https://kernelnewbies.org/mailinglistguidelines
So don't that Mr. Dan! :D

But anyway, I think this runtime detection thing is not needed. THP is
actually expected to be as fast as this anyway, so if that's available then
we should already be as fast. This is for non-THP where THP cannot be enabled
and there is still room for some improvement. Most/all architectures will be
just fine with this. This flag is more of a safety-net type of thing where in
the future if there is this one or two weird architectures that don't play
well, then they can turn it off at the architecture level by not selecting
the flag. See my latest patches for the per-architecture compile-time
controls. Ideally we'd like to blanket turn it on on all, but this is just
playing it extra safe as Kirill and me were discussing on other threads.

thanks!

- Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-13  2:10                   ` Joel Fernandes
                                       ` (4 preceding siblings ...)
  (?)
@ 2018-10-13  2:25                     ` Daniel Colascione
  -1 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, Hugh Dickins, Lokesh Gidra, Michal Hocko,
	Andrew Morton, aryabinin, luto, bp, catalin.marinas,
	Chris Zankel, dave.hansen, elfring, fenghua.yu, geert, gxt,
	deller, mingo, jejb, jdike, Jonas Bonn, Julia Lawall, kasan-dev,
	kvmarm

On Fri, Oct 12, 2018 at 7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:25                     ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, Hugh Dickins, Lokesh Gidra, Michal Hocko,
	Andrew Morton, aryabinin, luto, bp, catalin.marinas,
	Chris Zankel, dave.hansen, elfring, fenghua.yu, geert, gxt,
	deller, mingo, jejb, jdike, Jonas Bonn, Julia Lawall, kasan-dev,
	kvmarm

On Fri, Oct 12, 2018 at 7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:25                     ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, Hugh Dickins, Lokesh Gidra, Michal Hocko,
	Andrew Morton, aryabinin, luto, bp, catalin.marinas,
	Chris Zankel, dave.hansen, elfring, fenghua.yu, geert, gxt,
	deller, mingo, jejb, jdike, Jonas Bonn, Julia Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, Max Filippov, nios2-dev, Peter Zijlstra, richard

On Fri, Oct 12, 2018 at 7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:25                     ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:25                     ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, Jonas Bonn, linux-s390,
	linux-xtensa, linux-hexagon, deller, Hugh Dickins, jejb,
	kasan-dev, kvmarm, mingo, geert, aryabinin, linux-snps-arc,
	kernel-team, fenghua.yu, jdike, linux-um, Julia Lawall,
	linux-m68k, bp, luto, nios2-dev, kirill, gxt, Chris Zankel,
	richard, linux-parisc, Ramon Pantin, Max Filippov, linux-kernel,
	Minchan Kim, linux-alpha, lftan, Andrew Morton, linuxppc-dev,
	David Miller

On Fri, Oct 12, 2018 at 7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:25                     ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, Jonas Bonn, linux-s390,
	linux-xtensa, linux-hexagon, deller, Hugh Dickins, jejb,
	kasan-dev, kvmarm, mingo, geert, aryabinin, linux-snps-arc,
	kernel-team, fenghua.yu, jdike, linux-um, Julia Lawall,
	linux-m68k, bp, luto, nios2-dev, kirill, gxt, Chris Zankel,
	richard, linux-parisc, Ramon Pantin, Max Filippov, linux-kernel,
	Minchan Kim, linux-alpha, lftan, Andrew Morton, linuxppc-dev,
	David Miller

On Fri, Oct 12, 2018 at 7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018 at 06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  2:25                     ` Daniel Colascione
  0 siblings, 0 replies; 317+ messages in thread
From: Daniel Colascione @ 2018-10-13  2:25 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@7:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> On Fri, Oct 12, 2018@06:54:33PM -0700, Daniel Colascione wrote:
>> I wonder whether it makes sense to expose to userspace somehow whether
>> mremap is "fast" for a particular architecture. If a feature relies on
>> fast mremap, it might be better for some userland component to disable
>> that feature entirely rather than blindly use mremap and end up
>> performing very poorly. If we're disabling fast mremap when THP is
>> enabled, the userland component can't just rely on an architecture
>> switch and some kind of runtime feature detection becomes even more
>> important.
>
> I hate to point out that its forbidden to top post on LKML :-)
> https://kernelnewbies.org/mailinglistguidelines
> So don't that Mr. Dan! :D

Guilty as charged. I really should switch back to Gnus. :-)

> But anyway, I think this runtime detection thing is not needed. THP is
> actually expected to be as fast as this anyway, so if that's available then
> we should already be as fast.

Ah, I think the commit message is confusing. (Or else I'm misreading
the patch now.) It's not quite that we're disabling the feature when
THP is enabled anywhere, but rather that we use the move_huge_pmd path
for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
that's the case, the commit message shouldn't say "Incase THP is
enabled, the optimization is skipped". Even if THP is enabled on a
system generally, we might use the new PMD-moving code for mapping
types that don't support THP-ization, right?

> This is for non-THP where THP cannot be enabled
> and there is still room for some improvement. Most/all architectures will be
> just fine with this. This flag is more of a safety-net type of thing where in
> the future if there is this one or two weird architectures that don't play
> well, then they can turn it off at the architecture level by not selecting
> the flag. See my latest patches for the per-architecture compile-time
> controls. Ideally we'd like to blanket turn it on on all, but this is just
> playing it extra safe as Kirill and me were discussing on other threads.

Sure. I'm just pointing out that the 500x performance different turns
the operation into a qualitatively different feature, so if we expect
to actually ship a mainstream architecture without support for this
thing, we should make it explicit. If we're not, we shouldn't.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12 21:40             ` Kirill A. Shutemov
                                 ` (6 preceding siblings ...)
  (?)
@ 2018-10-13  6:10               ` Anton Ivanov
  -1 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

T24gMTIvMTAvMjAxOCAyMjo0MCwgS2lyaWxsIEEuIFNodXRlbW92IHdyb3RlOgo+IE9uIEZyaSwg
T2N0IDEyLCAyMDE4IGF0IDA1OjQyOjI0UE0gKzAxMDAsIEFudG9uIEl2YW5vdiB3cm90ZToKPj4g
T24gMTAvMTIvMTggMzo0OCBQTSwgQW50b24gSXZhbm92IHdyb3RlOgo+Pj4gT24gMTIvMTAvMjAx
OCAxNTozNywgS2lyaWxsIEEuIFNodXRlbW92IHdyb3RlOgo+Pj4+IE9uIEZyaSwgT2N0IDEyLCAy
MDE4IGF0IDAzOjA5OjQ5UE0gKzAxMDAsIEFudG9uIEl2YW5vdiB3cm90ZToKPj4+Pj4gT24gMTAv
MTIvMTggMjozNyBBTSwgSm9lbCBGZXJuYW5kZXMgKEdvb2dsZSkgd3JvdGU6Cj4+Pj4+PiBBbmRy
b2lkIG5lZWRzIHRvIG1yZW1hcCBsYXJnZSByZWdpb25zIG9mIG1lbW9yeSBkdXJpbmcKPj4+Pj4+
IG1lbW9yeSBtYW5hZ2VtZW50Cj4+Pj4+PiByZWxhdGVkIG9wZXJhdGlvbnMuIFRoZSBtcmVtYXAg
c3lzdGVtIGNhbGwgY2FuIGJlIHJlYWxseQo+Pj4+Pj4gc2xvdyBpZiBUSFAgaXMKPj4+Pj4+IG5v
dCBlbmFibGVkLiBUaGUgYm90dGxlbmVjayBpcyBtb3ZlX3BhZ2VfdGFibGVzLCB3aGljaCBpcyBj
b3B5aW5nIGVhY2gKPj4+Pj4+IHB0ZSBhdCBhIHRpbWUsIGFuZCBjYW4gYmUgcmVhbGx5IHNsb3cg
YWNyb3NzIGEgbGFyZ2UgbWFwLgo+Pj4+Pj4gVHVybmluZyBvbiBUSFAKPj4+Pj4+IG1heSBub3Qg
YmUgYSB2aWFibGUgb3B0aW9uLCBhbmQgaXMgbm90IGZvciB1cy4gVGhpcyBwYXRjaAo+Pj4+Pj4g
c3BlZWRzIHVwIHRoZQo+Pj4+Pj4gcGVyZm9ybWFuY2UgZm9yIG5vbi1USFAgc3lzdGVtIGJ5IGNv
cHlpbmcgYXQgdGhlIFBNRCBsZXZlbAo+Pj4+Pj4gd2hlbiBwb3NzaWJsZS4KPj4+Pj4+Cj4+Pj4+
PiBUaGUgc3BlZWQgdXAgaXMgdGhyZWUgb3JkZXJzIG9mIG1hZ25pdHVkZS4gT24gYSAxR0IgbXJl
bWFwLCB0aGUgbXJlbWFwCj4+Pj4+PiBjb21wbGV0aW9uIHRpbWVzIGRyb3BzIGZyb20gMTYwLTI1
MCBtaWxsZXNjb25kcyB0byAzODAtNDAwCj4+Pj4+PiBtaWNyb3NlY29uZHMuCj4+Pj4+Pgo+Pj4+
Pj4gQmVmb3JlOgo+Pj4+Pj4gVG90YWwgbXJlbWFwIHRpbWUgZm9yIDFHQiBkYXRhOiAyNDIzMjEw
MTQgbmFub3NlY29uZHMuCj4+Pj4+PiBUb3RhbCBtcmVtYXAgdGltZSBmb3IgMUdCIGRhdGE6IDE5
Njg0MjQ2NyBuYW5vc2Vjb25kcy4KPj4+Pj4+IFRvdGFsIG1yZW1hcCB0aW1lIGZvciAxR0IgZGF0
YTogMTY3MDUxMTYyIG5hbm9zZWNvbmRzLgo+Pj4+Pj4KPj4+Pj4+IEFmdGVyOgo+Pj4+Pj4gVG90
YWwgbXJlbWFwIHRpbWUgZm9yIDFHQiBkYXRhOiAzODU3ODEgbmFub3NlY29uZHMuCj4+Pj4+PiBU
b3RhbCBtcmVtYXAgdGltZSBmb3IgMUdCIGRhdGE6IDM4ODk1OSBuYW5vc2Vjb25kcy4KPj4+Pj4+
IFRvdGFsIG1yZW1hcCB0aW1lIGZvciAxR0IgZGF0YTogNDAyODEzIG5hbm9zZWNvbmRzLgo+Pj4+
Pj4KPj4+Pj4+IEluY2FzZSBUSFAgaXMgZW5hYmxlZCwgdGhlIG9wdGltaXphdGlvbiBpcyBza2lw
cGVkLiBJIGFsc28gZmx1c2ggdGhlCj4+Pj4+PiB0bGIgZXZlcnkgdGltZSB3ZSBkbyB0aGlzIG9w
dGltaXphdGlvbiBzaW5jZSBJIGNvdWxkbid0IGZpbmQgYSB3YXkgdG8KPj4+Pj4+IGRldGVybWlu
ZSBpZiB0aGUgbG93LWxldmVsIFBURXMgYXJlIGRpcnR5LiBJdCBpcyBzZWVuIHRoYXQgdGhlIGNv
c3Qgb2YKPj4+Pj4+IGRvaW5nIHNvIGlzIG5vdCBtdWNoIGNvbXBhcmVkIHRoZSBpbXByb3ZlbWVu
dCwgb24gYm90aAo+Pj4+Pj4geDg2LTY0IGFuZCBhcm02NC4KPj4+Pj4+Cj4+Pj4+PiBDYzogbWlu
Y2hhbkBrZXJuZWwub3JnCj4+Pj4+PiBDYzogcGFudGluQGdvb2dsZS5jb20KPj4+Pj4+IENjOiBo
dWdoZEBnb29nbGUuY29tCj4+Pj4+PiBDYzogbG9rZXNoZ2lkcmFAZ29vZ2xlLmNvbQo+Pj4+Pj4g
Q2M6IGRhbmNvbEBnb29nbGUuY29tCj4+Pj4+PiBDYzogbWhvY2tvQGtlcm5lbC5vcmcKPj4+Pj4+
IENjOiBraXJpbGxAc2h1dGVtb3YubmFtZQo+Pj4+Pj4gQ2M6IGFrcG1AbGludXgtZm91bmRhdGlv
bi5vcmcKPj4+Pj4+IFNpZ25lZC1vZmYtYnk6IEpvZWwgRmVybmFuZGVzIChHb29nbGUpIDxqb2Vs
QGpvZWxmZXJuYW5kZXMub3JnPgo+Pj4+Pj4gLS0tCj4+Pj4+PiAgwqDCoCBtbS9tcmVtYXAuYyB8
IDYyCj4+Pj4+PiArKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysr
KysrKysrKwo+Pj4+Pj4gIMKgwqAgMSBmaWxlIGNoYW5nZWQsIDYyIGluc2VydGlvbnMoKykKPj4+
Pj4+Cj4+Pj4+PiBkaWZmIC0tZ2l0IGEvbW0vbXJlbWFwLmMgYi9tbS9tcmVtYXAuYwo+Pj4+Pj4g
aW5kZXggOWU2OGEwMmE1MmIxLi5kODJjNDg1ODIyZWYgMTAwNjQ0Cj4+Pj4+PiAtLS0gYS9tbS9t
cmVtYXAuYwo+Pj4+Pj4gKysrIGIvbW0vbXJlbWFwLmMKPj4+Pj4+IEBAIC0xOTEsNiArMTkxLDU0
IEBAIHN0YXRpYyB2b2lkIG1vdmVfcHRlcyhzdHJ1Y3QKPj4+Pj4+IHZtX2FyZWFfc3RydWN0ICp2
bWEsIHBtZF90ICpvbGRfcG1kLAo+Pj4+Pj4gIMKgwqDCoMKgwqDCoMKgwqDCoMKgIGRyb3Bfcm1h
cF9sb2Nrcyh2bWEpOwo+Pj4+Pj4gIMKgwqAgfQo+Pj4+Pj4gK3N0YXRpYyBib29sIG1vdmVfbm9y
bWFsX3BtZChzdHJ1Y3Qgdm1fYXJlYV9zdHJ1Y3QgKnZtYSwKPj4+Pj4+IHVuc2lnbmVkIGxvbmcg
b2xkX2FkZHIsCj4+Pj4+PiArwqDCoMKgwqDCoMKgwqDCoMKgIHVuc2lnbmVkIGxvbmcgbmV3X2Fk
ZHIsIHVuc2lnbmVkIGxvbmcgb2xkX2VuZCwKPj4+Pj4+ICvCoMKgwqDCoMKgwqDCoMKgwqAgcG1k
X3QgKm9sZF9wbWQsIHBtZF90ICpuZXdfcG1kLCBib29sICpuZWVkX2ZsdXNoKQo+Pj4+Pj4gK3sK
Pj4+Pj4+ICvCoMKgwqAgc3BpbmxvY2tfdCAqb2xkX3B0bCwgKm5ld19wdGw7Cj4+Pj4+PiArwqDC
oMKgIHN0cnVjdCBtbV9zdHJ1Y3QgKm1tID0gdm1hLT52bV9tbTsKPj4+Pj4+ICsKPj4+Pj4+ICvC
oMKgwqAgaWYgKChvbGRfYWRkciAmIH5QTURfTUFTSykgfHwgKG5ld19hZGRyICYgflBNRF9NQVNL
KQo+Pj4+Pj4gK8KgwqDCoMKgwqDCoMKgIHx8IG9sZF9lbmQgLSBvbGRfYWRkciA8IFBNRF9TSVpF
KQo+Pj4+Pj4gK8KgwqDCoMKgwqDCoMKgIHJldHVybiBmYWxzZTsKPj4+Pj4+ICsKPj4+Pj4+ICvC
oMKgwqAgLyoKPj4+Pj4+ICvCoMKgwqDCoCAqIFRoZSBkZXN0aW5hdGlvbiBwbWQgc2hvdWxkbid0
IGJlIGVzdGFibGlzaGVkLCBmcmVlX3BndGFibGVzKCkKPj4+Pj4+ICvCoMKgwqDCoCAqIHNob3Vs
ZCBoYXZlIHJlbGVhc2UgaXQuCj4+Pj4+PiArwqDCoMKgwqAgKi8KPj4+Pj4+ICvCoMKgwqAgaWYg
KFdBUk5fT04oIXBtZF9ub25lKCpuZXdfcG1kKSkpCj4+Pj4+PiArwqDCoMKgwqDCoMKgwqAgcmV0
dXJuIGZhbHNlOwo+Pj4+Pj4gKwo+Pj4+Pj4gK8KgwqDCoCAvKgo+Pj4+Pj4gK8KgwqDCoMKgICog
V2UgZG9uJ3QgaGF2ZSB0byB3b3JyeSBhYm91dCB0aGUgb3JkZXJpbmcgb2Ygc3JjIGFuZCBkc3QK
Pj4+Pj4+ICvCoMKgwqDCoCAqIHB0bG9ja3MgYmVjYXVzZSBleGNsdXNpdmUgbW1hcF9zZW0gcHJl
dmVudHMgZGVhZGxvY2suCj4+Pj4+PiArwqDCoMKgwqAgKi8KPj4+Pj4+ICvCoMKgwqAgb2xkX3B0
bCA9IHBtZF9sb2NrKHZtYS0+dm1fbW0sIG9sZF9wbWQpOwo+Pj4+Pj4gK8KgwqDCoCBpZiAob2xk
X3B0bCkgewo+Pj4+Pj4gK8KgwqDCoMKgwqDCoMKgIHBtZF90IHBtZDsKPj4+Pj4+ICsKPj4+Pj4+
ICvCoMKgwqDCoMKgwqDCoCBuZXdfcHRsID0gcG1kX2xvY2twdHIobW0sIG5ld19wbWQpOwo+Pj4+
Pj4gK8KgwqDCoMKgwqDCoMKgIGlmIChuZXdfcHRsICE9IG9sZF9wdGwpCj4+Pj4+PiArwqDCoMKg
wqDCoMKgwqDCoMKgwqDCoCBzcGluX2xvY2tfbmVzdGVkKG5ld19wdGwsIFNJTkdMRV9ERVBUSF9O
RVNUSU5HKTsKPj4+Pj4+ICsKPj4+Pj4+ICvCoMKgwqDCoMKgwqDCoCAvKiBDbGVhciB0aGUgcG1k
ICovCj4+Pj4+PiArwqDCoMKgwqDCoMKgwqAgcG1kID0gKm9sZF9wbWQ7Cj4+Pj4+PiArwqDCoMKg
wqDCoMKgwqAgcG1kX2NsZWFyKG9sZF9wbWQpOwo+Pj4+Pj4gKwo+Pj4+Pj4gK8KgwqDCoMKgwqDC
oMKgIFZNX0JVR19PTighcG1kX25vbmUoKm5ld19wbWQpKTsKPj4+Pj4+ICsKPj4+Pj4+ICvCoMKg
wqDCoMKgwqDCoCAvKiBTZXQgdGhlIG5ldyBwbWQgKi8KPj4+Pj4+ICvCoMKgwqDCoMKgwqDCoCBz
ZXRfcG1kX2F0KG1tLCBuZXdfYWRkciwgbmV3X3BtZCwgcG1kKTsKPj4+Pj4gVU1MIGRvZXMgbm90
IGhhdmUgc2V0X3BtZF9hdCBhdCBhbGwKPj4+PiBFdmVyeSBhcmNoaXRlY3R1cmUgZG9lcy4gOikK
Pj4+IEkgdHJpZWQgdG8gYnVpbGQgaXQgcGF0Y2hpbmcgdnMgNC4xOS1yYyBiZWZvcmUgSSBtYWRl
IHRoaXMgc3RhdGVtZW50IGFuZAo+Pj4gcmFuIGludG8gdGhhdC4KPj4+Cj4+PiBQcmVzZW50bHkg
aXQgZG9lcyBub3QuCj4+Pgo+Pj4gaHR0cHM6Ly9lbGl4aXIuYm9vdGxpbi5jb20vbGludXgvdjQu
MTktcmM3L2lkZW50L3NldF9wbWRfYXQgLSBVTUwgaXMgbm90Cj4+PiBvbiB0aGUgbGlzdC4KPj4g
T25jZSB0aGlzIHByb2JsZW0gYXMgd2VsbCBhcyB0aGUgb21pc3Npb25zIGluIHRoZSBpbmNsdWRl
IGNoYW5nZXMgZm9yIFVNTCBpbgo+PiBwYXRjaCBvbmUgaGF2ZSBiZWVuIGZpeGVkIGl0IGFwcGVh
cnMgdG8gYmUgd29ya2luZy4KPj4KPj4gV2hhdCBpdCBuZWVkcyBpcyBhdHRhY2hlZC4KPiBXZWxs
LCB0aGUgb3B0aXphdGlvbiBpcyBvbmx5IHN1aXRhYmxlIGZvciBhcmNoIHRoYXQgaGFzIDMgb3Ig
bW9yZSBsZXZlbHMKPiBvZiBwYWdlIHRhYmxlcy4gT3RoZXJ3aXNlIGl0IHdpbGwgbm90IGhhdmUg
W25vbi1mb2xkZWRdIHBtZC4KPgo+IEFuZCBpbiB0aGlzIGNhc2UgYXJjaC91bSBhbHJlYWR5IHNo
b3VsZCBoYXZlIHNldF9wbWRfYXQoKSwgc2VlCj4gM19MRVZFTF9QR1RBQkxFUy4KPgo+IFRvIHBv
cnQgb24gMi1sZXZlbCBwYWdpbmcsIGl0IGhhcyB0byBiZSBoYW5kbGVkIG9uIHBnZCBsZXZlbC4g
SXQKPiBjb21wbGljYXRlcyB0aGUgY29kZSBhbmQgd2lsbCBub3QgYnJpbmcgbXVjaCB2YWx1ZS4K
PgpVTUwgaGFzIDMgbGV2ZWwgcGFnZSB0YWJsZXMgb24gNjQgYml0LgoKQS4KCgpfX19fX19fX19f
X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fXwpsaW51eC1zbnBzLWFyYyBtYWls
aW5nIGxpc3QKbGludXgtc25wcy1hcmNAbGlzdHMuaW5mcmFkZWFkLm9yZwpodHRwOi8vbGlzdHMu
aW5mcmFkZWFkLm9yZy9tYWlsbWFuL2xpc3RpbmZvL2xpbnV4LXNucHMtYXJj

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.


_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: linux-riscv

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan at kernel.org
>>>>>> Cc: pantin at google.com
>>>>>> Cc: hughd at google.com
>>>>>> Cc: lokeshgidra at google.com
>>>>>> Cc: dancol at google.com
>>>>>> Cc: mhocko at kernel.org
>>>>>> Cc: kirill at shutemov.name
>>>>>> Cc: akpm at linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>  ?? mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  ?? 1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>  ?????????? drop_rmap_locks(vma);
>>>>>>  ?? }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +????????? unsigned long new_addr, unsigned long old_end,
>>>>>> +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +??? spinlock_t *old_ptl, *new_ptl;
>>>>>> +??? struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +??????? || old_end - old_addr < PMD_SIZE)
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +???? * should have release it.
>>>>>> +???? */
>>>>>> +??? if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * We don't have to worry about the ordering of src and dst
>>>>>> +???? * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +???? */
>>>>>> +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +??? if (old_ptl) {
>>>>>> +??????? pmd_t pmd;
>>>>>> +
>>>>>> +??????? new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +??????? if (new_ptl != old_ptl)
>>>>>> +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +??????? /* Clear the pmd */
>>>>>> +??????? pmd = *old_pmd;
>>>>>> +??????? pmd_clear(old_pmd);
>>>>>> +
>>>>>> +??????? VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +??????? /* Set the new pmd */
>>>>>> +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Joel Fernandes (Google),
	linux-kernel, linux-mips, Rich Felker, linux-ia64, linux-sh,
	Peter Zijlstra, Catalin Marinas, Dave Hansen, Will Deacon,
	mhocko, linux-mm, lokeshgidra, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, sparclinux, linux-xtensa,
	linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	Richard Weinberger, linux-parisc, pantin, Max Filippov, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>  A A  mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  A A  1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>  A A A A A A A A A A  drop_rmap_locks(vma);
>>>>>>  A A  }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +A A A A A A A A A  unsigned long new_addr, unsigned long old_end,
>>>>>> +A A A A A A A A A  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +A A A  spinlock_t *old_ptl, *new_ptl;
>>>>>> +A A A  struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +A A A  if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +A A A A A A A  || old_end - old_addr < PMD_SIZE)
>>>>>> +A A A A A A A  return false;
>>>>>> +
>>>>>> +A A A  /*
>>>>>> +A A A A  * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +A A A A  * should have release it.
>>>>>> +A A A A  */
>>>>>> +A A A  if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +A A A A A A A  return false;
>>>>>> +
>>>>>> +A A A  /*
>>>>>> +A A A A  * We don't have to worry about the ordering of src and dst
>>>>>> +A A A A  * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +A A A A  */
>>>>>> +A A A  old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +A A A  if (old_ptl) {
>>>>>> +A A A A A A A  pmd_t pmd;
>>>>>> +
>>>>>> +A A A A A A A  new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +A A A A A A A  if (new_ptl != old_ptl)
>>>>>> +A A A A A A A A A A A  spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +A A A A A A A  /* Clear the pmd */
>>>>>> +A A A A A A A  pmd = *old_pmd;
>>>>>> +A A A A A A A  pmd_clear(old_pmd);
>>>>>> +
>>>>>> +A A A A A A A  VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +A A A A A A A  /* Set the new pmd */
>>>>>> +A A A A A A A  set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, hughd,
	James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, linux-xtensa, Jeff Dike, linux-alpha, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Stafford Horne,
	Guan Xuetao, linux-arm-kernel, Chris Zankel, Tony Luck,
	linux-parisc, pantin, linux-kernel, Fenghua Yu, minchan,
	Thomas Gleixner, Richard Weinberger, sparclinux, akpm,
	linuxppc-dev, David S. Miller, Ley Foon Tan

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan@kernel.org
>>>>>> Cc: pantin@google.com
>>>>>> Cc: hughd@google.com
>>>>>> Cc: lokeshgidra@google.com
>>>>>> Cc: dancol@google.com
>>>>>> Cc: mhocko@kernel.org
>>>>>> Cc: kirill@shutemov.name
>>>>>> Cc: akpm@linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: linux-snps-arc

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018@05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018@03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan at kernel.org
>>>>>> Cc: pantin at google.com
>>>>>> Cc: hughd at google.com
>>>>>> Cc: lokeshgidra at google.com
>>>>>> Cc: dancol at google.com
>>>>>> Cc: mhocko at kernel.org
>>>>>> Cc: kirill at shutemov.name
>>>>>> Cc: akpm at linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
>>>>>> ---
>>>>>>  ?? mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  ?? 1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>  ?????????? drop_rmap_locks(vma);
>>>>>>  ?? }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +????????? unsigned long new_addr, unsigned long old_end,
>>>>>> +????????? pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +??? spinlock_t *old_ptl, *new_ptl;
>>>>>> +??? struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +??? if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +??????? || old_end - old_addr < PMD_SIZE)
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +???? * should have release it.
>>>>>> +???? */
>>>>>> +??? if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +??????? return false;
>>>>>> +
>>>>>> +??? /*
>>>>>> +???? * We don't have to worry about the ordering of src and dst
>>>>>> +???? * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +???? */
>>>>>> +??? old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +??? if (old_ptl) {
>>>>>> +??????? pmd_t pmd;
>>>>>> +
>>>>>> +??????? new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +??????? if (new_ptl != old_ptl)
>>>>>> +??????????? spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +??????? /* Clear the pmd */
>>>>>> +??????? pmd = *old_pmd;
>>>>>> +??????? pmd_clear(old_pmd);
>>>>>> +
>>>>>> +??????? VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +??????? /* Set the new pmd */
>>>>>> +??????? set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13  6:10               ` Anton Ivanov
  0 siblings, 0 replies; 317+ messages in thread
From: Anton Ivanov @ 2018-10-13  6:10 UTC (permalink / raw)
  To: openrisc

On 12/10/2018 22:40, Kirill A. Shutemov wrote:
> On Fri, Oct 12, 2018 at 05:42:24PM +0100, Anton Ivanov wrote:
>> On 10/12/18 3:48 PM, Anton Ivanov wrote:
>>> On 12/10/2018 15:37, Kirill A. Shutemov wrote:
>>>> On Fri, Oct 12, 2018 at 03:09:49PM +0100, Anton Ivanov wrote:
>>>>> On 10/12/18 2:37 AM, Joel Fernandes (Google) wrote:
>>>>>> Android needs to mremap large regions of memory during
>>>>>> memory management
>>>>>> related operations. The mremap system call can be really
>>>>>> slow if THP is
>>>>>> not enabled. The bottleneck is move_page_tables, which is copying each
>>>>>> pte at a time, and can be really slow across a large map.
>>>>>> Turning on THP
>>>>>> may not be a viable option, and is not for us. This patch
>>>>>> speeds up the
>>>>>> performance for non-THP system by copying at the PMD level
>>>>>> when possible.
>>>>>>
>>>>>> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
>>>>>> completion times drops from 160-250 millesconds to 380-400
>>>>>> microseconds.
>>>>>>
>>>>>> Before:
>>>>>> Total mremap time for 1GB data: 242321014 nanoseconds.
>>>>>> Total mremap time for 1GB data: 196842467 nanoseconds.
>>>>>> Total mremap time for 1GB data: 167051162 nanoseconds.
>>>>>>
>>>>>> After:
>>>>>> Total mremap time for 1GB data: 385781 nanoseconds.
>>>>>> Total mremap time for 1GB data: 388959 nanoseconds.
>>>>>> Total mremap time for 1GB data: 402813 nanoseconds.
>>>>>>
>>>>>> Incase THP is enabled, the optimization is skipped. I also flush the
>>>>>> tlb every time we do this optimization since I couldn't find a way to
>>>>>> determine if the low-level PTEs are dirty. It is seen that the cost of
>>>>>> doing so is not much compared the improvement, on both
>>>>>> x86-64 and arm64.
>>>>>>
>>>>>> Cc: minchan at kernel.org
>>>>>> Cc: pantin at google.com
>>>>>> Cc: hughd at google.com
>>>>>> Cc: lokeshgidra at google.com
>>>>>> Cc: dancol at google.com
>>>>>> Cc: mhocko at kernel.org
>>>>>> Cc: kirill at shutemov.name
>>>>>> Cc: akpm at linux-foundation.org
>>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>>> ---
>>>>>>     mm/mremap.c | 62
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     1 file changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/mm/mremap.c b/mm/mremap.c
>>>>>> index 9e68a02a52b1..d82c485822ef 100644
>>>>>> --- a/mm/mremap.c
>>>>>> +++ b/mm/mremap.c
>>>>>> @@ -191,6 +191,54 @@ static void move_ptes(struct
>>>>>> vm_area_struct *vma, pmd_t *old_pmd,
>>>>>>             drop_rmap_locks(vma);
>>>>>>     }
>>>>>> +static bool move_normal_pmd(struct vm_area_struct *vma,
>>>>>> unsigned long old_addr,
>>>>>> +          unsigned long new_addr, unsigned long old_end,
>>>>>> +          pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
>>>>>> +{
>>>>>> +    spinlock_t *old_ptl, *new_ptl;
>>>>>> +    struct mm_struct *mm = vma->vm_mm;
>>>>>> +
>>>>>> +    if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
>>>>>> +        || old_end - old_addr < PMD_SIZE)
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The destination pmd shouldn't be established, free_pgtables()
>>>>>> +     * should have release it.
>>>>>> +     */
>>>>>> +    if (WARN_ON(!pmd_none(*new_pmd)))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * We don't have to worry about the ordering of src and dst
>>>>>> +     * ptlocks because exclusive mmap_sem prevents deadlock.
>>>>>> +     */
>>>>>> +    old_ptl = pmd_lock(vma->vm_mm, old_pmd);
>>>>>> +    if (old_ptl) {
>>>>>> +        pmd_t pmd;
>>>>>> +
>>>>>> +        new_ptl = pmd_lockptr(mm, new_pmd);
>>>>>> +        if (new_ptl != old_ptl)
>>>>>> +            spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>>>>>> +
>>>>>> +        /* Clear the pmd */
>>>>>> +        pmd = *old_pmd;
>>>>>> +        pmd_clear(old_pmd);
>>>>>> +
>>>>>> +        VM_BUG_ON(!pmd_none(*new_pmd));
>>>>>> +
>>>>>> +        /* Set the new pmd */
>>>>>> +        set_pmd_at(mm, new_addr, new_pmd, pmd);
>>>>> UML does not have set_pmd_at at all
>>>> Every architecture does. :)
>>> I tried to build it patching vs 4.19-rc before I made this statement and
>>> ran into that.
>>>
>>> Presently it does not.
>>>
>>> https://elixir.bootlin.com/linux/v4.19-rc7/ident/set_pmd_at - UML is not
>>> on the list.
>> Once this problem as well as the omissions in the include changes for UML in
>> patch one have been fixed it appears to be working.
>>
>> What it needs is attached.
> Well, the optization is only suitable for arch that has 3 or more levels
> of page tables. Otherwise it will not have [non-folded] pmd.
>
> And in this case arch/um already should have set_pmd_at(), see
> 3_LEVEL_PGTABLES.
>
> To port on 2-level paging, it has to be handled on pgd level. It
> complicates the code and will not bring much value.
>
UML has 3 level page tables on 64 bit.

A.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
  2018-10-12 19:42     ` Joel Fernandes
                         ` (6 preceding siblings ...)
  (?)
@ 2018-10-13  9:22       ` SF Markus Elfring
  -1 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo

Pj4+IFRoZSBjaGFuZ2VzIHdlcmUgb2J0YWluZWQgYnkgYXBwbHlpbmcgdGhlIGZvbGxvd2luZyBD
b2NjaW5lbGxlIHNjcmlwdC4KCkhvdyBkbyB5b3UgdGhpbmsgYWJvdXQgdG8gYWRqdXN0IHRoZSBv
cmRlciBvZiBwcm92aWRlZCBpbmZvcm1hdGlvbgppbiB0aGUgY29tbWl0IGRlc2NyaXB0aW9uPwox
LiBVcGRhdGUgZ29hbHMKMi4gVHJhbnNmb3JtYXRpb24gaW1wbGVtZW50YXRpb24gYXQgdGhlIGVu
ZAoKCj4+ICJeKD86cHRlX2FsbG9jKD86X29uZSg/Ol9rZXJuZWwpPyk/fF9fcHRlX2FsbG9jKD86
X2tlcm5lbCk/KSQiOwo+IAo+IFN1cmUgaXQgbG9va3MgbW9yZSBjbGV2ZXIsIGJ1dCB3aHk/Cgox
LiBVc2FnZSBvZiBub24tY2FwdHVyaW5nIHBhcmVudGhlc2VzCjIuIENsZWFyZXIgc3BlY2lmaWNh
dGlvbiB3aGljaCBwYXJ0cyBjYW4gYmUgdHJlYXRlZCBhcyBvcHRpb25hbAogICBpbiB0aGUgc2Vh
cmNoIHBhdHRlcm4uCgoKPiBVZ2ggdGhhdCdzIGhhcmRlciB0byByZWFkIGFuZCBjb25mdXNpbmcu
CgoqIERvIHlvdSBjYXJlIGZvciBjb2Rpbmcgc3R5bGUgYW5kIGV4ZWN1dGlvbiBzcGVlZCBvZiBy
ZWd1bGFyIGV4cHJlc3Npb25zPwoKKiBJZiB5b3Ugd291bGQgcHJlZmVyIHRvIGxpc3QgZnVuY3Rp
b24gbmFtZXMgd2l0aG91dCBwbGFjZWhvbGRlcnMsCiAgeW91IGNhbiBldmVudHVhbGx5IHNwZWNp
ZnkgdGhlbSBhbHNvIHdpdGhpbiBTbVBMIGRpc2p1bmN0aW9ucyBkaXJlY3RseS4KCiogSXQgY2Fu
IGxvb2sgc2ltcGxlciB0byB1c2UgYW4gaWRlbnRpZmllciBsaXN0IGFzIGEgY29uc3RyYWludCB2
YXJpYW50LgogIGh0dHA6Ly9jb2NjaW5lbGxlLmxpcDYuZnIvZG9jcy9tYWluX2dyYW1tYXIwMDIu
aHRtbAoKCj4gQWdhaW4gdGhpcyBpcyBjb25mdXNpbmcuCgpUaGUgdmlldyBwb2ludHMgY2FuIGJl
IGRpZmZlcmVudCBmb3Igc3VjaCBTbVBMIGNvZGUuCgogVDMgZm4oVDEgRTEKKAotICAgICAgICAg
ICAsIFQyIEUyCnwgICAgICAgICAgICwgVDIgRTIKLSAgICAgICAgICAgLCBUNCBFNAopICAgICAp
OwoKCj4gSXQgbWFrZXMgb25lIHRoaW5rIHRoYXQgbWF5YmUgdGhlIHNlY29uZCBhcmd1bWVudCBj
YW4gYWxzbyBiZSByZW1vdmVkCgpZb3UgZXhwcmVzc2VkIHRoaXMgYXMgdGhlIGZpcnN0IHRyYW5z
Zm9ybWF0aW9uIHBvc3NpYmlsaXR5LCBkaWRuJ3QgeW91PwoKWW91IHdvdWxkIGxpa2UgdG8gZGVs
ZXRlIGFuIGFyZ3VtZW50IGZyb20gdGhlIGVuZCBvZiBhIGZ1bmN0aW9uCm9yIG1hY3JvIHBhcmFt
ZXRlciAob3IgZXhwcmVzc2lvbikgbGlzdC4KSSBzdWdnZXN0IHRoZW4gYWdhaW4gdG8gYXZvaWQg
dGhlIFNtUEwgc3BlY2lmaWNhdGlvbiBvZiBzb3VyY2UgY29kZSBhZGRpdGlvbnMKKHBsdXMgbGlu
ZXMgaW4gdGhlIGZpbGUgZGlmZmVyZW5jZSBmb3JtYXQpLgoKCj4gYW5kIHJlcXVpcmVzIGNhcmVm
dWwgb2JzZXJ2YXRpb24gdGhhdCB0aGUgIik7IiBmb2xsb3dzLgoKWWVzLCBvZiBjb3Vyc2UuCgpX
b3VsZCB5b3UgY2FyZSBtb3JlIGluIHRoZSBkaXN0aW5jdGlvbiB3aGljaCBjb2RlIHBhcnRzIHNo
b3VsZCBiZSBrZXB0IHVuY2hhbmdlZD8KCgo+IFJpZ2h0LCBJIGRvbid0IG5lZWQgaXQgaW4gdGhp
cyBjYXNlLgoKVGhhbmtzIGZvciB5b3VyIHVuZGVyc3RhbmRpbmcgdGhhdCB0aGUgbWV0YXZhcmlh
YmxlIOKAnHBvc2l0aW9uIHDigJ0KY2FuIGJlIGRlbGV0ZWQgaW4gdGhlIFNtUEwgcnVsZSDigJxw
dGVfYWxsb2NfbWFjcm/igJ0uCgoKPiBCdXQgdGhlIHNjcmlwdCB3b3JrcyBlaXRoZXIgd2F5LgoK
SSBpbWFnaW5lIHRoYXQgeW91IGNhbiBiZWNvbWUgaW50ZXJlc3RlZCBpbiBhIGJpdCBuaWNlciBy
dW4gdGltZSBjaGFyYWN0ZXJpc3RpY3MuCgoKPiBJIGxpa2UgdG8gdGFrZSBtb3JlIG9mIGEgcHJv
YmxlbSBzb2x2aW5nIGFwcHJvYWNoIHRoYXQgbWFrZXMgc2Vuc2UsCgpUaGlzIGlzIHVzdWFsLgoK
Cj4gdGhhbiBhaW1pbmcgZm9yIHBlcmZlY3Rpb24sCgpJZiB5b3Ugd2lsbCB3b3JrIG1vcmUgd2l0
aCBzY3JpcHRzIGZvciB0aGUgc2VtYW50aWMgcGF0Y2ggbGFuZ3VhZ2UsCnlvdSBtaWdodCBiZWNv
bWUgdXNlZCB0byBhZGRpdGlvbmFsIGNvZGluZyB2YXJpYW50cy4KCgo+IGFmdGVyIGFsbCB0aGlz
IGlzIGEgdXNlZnVsIHNjcmlwdCB0aGF0IHdlIGRvIG5vdCBuZWVkIHRvIGNoZWNrCj4gaW4gb25j
ZSB3ZSBmaW5pc2ggd2l0aCBpdC4KCkkgYW0gY3VyaW91cyBpZiB0aGVyZSB3aWxsIGV2b2x2ZSBh
IG5lZWQgdG8gYWRkIHNpbWlsYXIgdHJhbnNmb3JtYXRpb24gYXBwcm9hY2hlcwp0byBhIGtub3du
IHNjcmlwdCBjb2xsZWN0aW9uLgpodHRwczovL2dpdC5rZXJuZWwub3JnL3B1Yi9zY20vbGludXgv
a2VybmVsL2dpdC90b3J2YWxkcy9saW51eC5naXQvdHJlZS9zY3JpcHRzL2NvY2NpbmVsbGU/aWQ9
NzlmYzE3MGIxZjVjMzZmNDg2ZDg4NmJmYmQ1OWViNGU2MjMyMTEyOAoKV291bGQgeW91IGV2ZW50
dWFsbHkgbGlrZSB0byBydW4gc3VjaCBzY3JpcHRzIG9uY2UgbW9yZT8KClJlZ2FyZHMsCk1hcmt1
cwoKX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KbGludXgt
c25wcy1hcmMgbWFpbGluZyBsaXN0CmxpbnV4LXNucHMtYXJjQGxpc3RzLmluZnJhZGVhZC5vcmcK
aHR0cDovL2xpc3RzLmluZnJhZGVhZC5vcmcvbWFpbG1hbi9saXN0aW5mby9saW51eC1zbnBzLWFy
Yw==

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable “position p”
can be deleted in the SmPL rule “pte_alloc_macro”.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: kernel-janitors, linux-kernel, kernel-team, Michal Hocko,
	Julia Lawall, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Daniel Colascione, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Hugh Dickins, Ingo Molnar, James E. J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	pantin, Lokesh Gidra, Max Filippov, Minchan Kim, nios2-dev,
	openrisc, Peter Zijlstra, Richard Weinberger, Rich Felker,
	Sam Creasey, sparclinux, Stafford Horne, Stefan Kristiansson,
	Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Kirill A. Shutemov, Andrew Morton

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable “position p”
can be deleted in the SmPL rule “pte_alloc_macro”.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: linux-riscv

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable ?position p?
can be deleted in the SmPL rule ?pte_alloc_macro?.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Will Deacon, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Kirill A. Shutemov,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	linux-kernel, Minchan Kim, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, Andrew Morton, linuxppc-dev, David S. Miller

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable “position p”
can be deleted in the SmPL rule “pte_alloc_macro”.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: kernel-janitors, linux-kernel, kernel-team, Michal Hocko,
	Julia Lawall, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Daniel Colascione, Dave Hansen,
	David S. Miller, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Hugh Dickins, Ingo Molnar, James E. J. Bottomley,
	Jeff Dike, Jonas Bonn, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	pantin, Lokesh Gidra, Max Filippov, Minchan Kim, nios2-dev,
	openrisc, Peter Zijlstra, Richard Weinberger, Rich Felker,
	Sam Creasey, sparclinux, Stafford Horne, Stefan Kristiansson,
	Thomas Gleixner, Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Kirill A. Shutemov, Andrew Morton

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable a??position pa??
can be deleted in the SmPL rule a??pte_alloc_macroa??.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, kernel-janitors, Michal Hocko,
	linux-mm, Lokesh Gidra, sparclinux, linux-riscv, kvmarm,
	Jonas Bonn, linux-s390, Daniel Colascione, Yoshinori Sato,
	Max Filippov, linux-hexagon, Helge Deller,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Hugh Dickins, James E. J. Bottomley, kasan-dev, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Will Deacon, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, Kirill A. Shutemov,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	linux-kernel, Minchan Kim, Thomas Gleixner, linux-alpha,
	Ley Foon Tan, Andrew Morton, linuxppc-dev, David S. Miller

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable “position p”
can be deleted in the SmPL rule “pte_alloc_macro”.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: linux-snps-arc

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable ?position p?
can be deleted in the SmPL rule ?pte_alloc_macro?.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions
@ 2018-10-13  9:22       ` SF Markus Elfring
  0 siblings, 0 replies; 317+ messages in thread
From: SF Markus Elfring @ 2018-10-13  9:22 UTC (permalink / raw)
  To: openrisc

>>> The changes were obtained by applying the following Coccinelle script.

How do you think about to adjust the order of provided information
in the commit description?
1. Update goals
2. Transformation implementation at the end


>> "^(?:pte_alloc(?:_one(?:_kernel)?)?|__pte_alloc(?:_kernel)?)$";
> 
> Sure it looks more clever, but why?

1. Usage of non-capturing parentheses
2. Clearer specification which parts can be treated as optional
   in the search pattern.


> Ugh that's harder to read and confusing.

* Do you care for coding style and execution speed of regular expressions?

* If you would prefer to list function names without placeholders,
  you can eventually specify them also within SmPL disjunctions directly.

* It can look simpler to use an identifier list as a constraint variant.
  http://coccinelle.lip6.fr/docs/main_grammar002.html


> Again this is confusing.

The view points can be different for such SmPL code.

 T3 fn(T1 E1
(
-           , T2 E2
|           , T2 E2
-           , T4 E4
)     );


> It makes one think that maybe the second argument can also be removed

You expressed this as the first transformation possibility, didn't you?

You would like to delete an argument from the end of a function
or macro parameter (or expression) list.
I suggest then again to avoid the SmPL specification of source code additions
(plus lines in the file difference format).


> and requires careful observation that the ");" follows.

Yes, of course.

Would you care more in the distinction which code parts should be kept unchanged?


> Right, I don't need it in this case.

Thanks for your understanding that the metavariable “position p”
can be deleted in the SmPL rule “pte_alloc_macro”.


> But the script works either way.

I imagine that you can become interested in a bit nicer run time characteristics.


> I like to take more of a problem solving approach that makes sense,

This is usual.


> than aiming for perfection,

If you will work more with scripts for the semantic patch language,
you might become used to additional coding variants.


> after all this is a useful script that we do not need to check
> in once we finish with it.

I am curious if there will evolve a need to add similar transformation approaches
to a known script collection.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle?id=79fc170b1f5c36f486d886bfbd59eb4e62321128

Would you eventually like to run such scripts once more?

Regards,
Markus

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-13  2:25                     ` Daniel Colascione
                                         ` (4 preceding siblings ...)
  (?)
@ 2018-10-13 17:50                       ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: dancol
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, Hugh Dickins, Lokesh Gidra, Michal Hocko,
	Andrew Morton, aryabinin, luto, bp, catalin.marinas,
	Chris Zankel, dave.hansen, elfring, fenghua.yu, geert, gxt,
	deller, mingo, jejb, jdike, Jonas Bonn, Julia Lawall, kasan-dev,
	kvmarm

On Fri, Oct 12, 2018 at 07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13 17:50                       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: dancol
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, Hugh Dickins, Lokesh Gidra, Michal Hocko,
	Andrew Morton, aryabinin, luto, bp, catalin.marinas,
	Chris Zankel, dave.hansen, elfring, fenghua.yu, geert, gxt,
	deller, mingo, jejb, jdike, Jonas Bonn, Julia Lawall, kasan-dev,
	kvmarm

On Fri, Oct 12, 2018 at 07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13 17:50                       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: dancol
  Cc: David Miller, kirill, linux-kernel, kernel-team, Minchan Kim,
	Ramon Pantin, Hugh Dickins, Lokesh Gidra, Michal Hocko,
	Andrew Morton, aryabinin, luto, bp, catalin.marinas,
	Chris Zankel, dave.hansen, elfring, fenghua.yu, geert, gxt,
	deller, mingo, jejb, jdike, Jonas Bonn, Julia Lawall, kasan-dev,
	kvmarm, lftan, linux-alpha, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, Max Filippov, nios2-dev, Peter Zijlstra, richard

On Fri, Oct 12, 2018 at 07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13 17:50                       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: linux-riscv

On Fri, Oct 12, 2018 at 07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13 17:50                       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: dancol
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, Jonas Bonn, linux-s390,
	linux-xtensa, linux-hexagon, deller, Hugh Dickins, jejb,
	kasan-dev, kvmarm, mingo, geert, aryabinin, linux-snps-arc,
	kernel-team, fenghua.yu, jdike, linux-um, Julia Lawall,
	linux-m68k, bp, luto, nios2-dev, kirill, gxt, Chris Zankel,
	richard, linux-parisc, Ramon Pantin, Max Filippov, linux-kernel,
	Minchan Kim, linux-alpha, lftan, Andrew Morton, linuxppc-dev,
	David Miller

On Fri, Oct 12, 2018 at 07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13 17:50                       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: dancol
  Cc: linux-mips, linux-ia64, linux-sh, Peter Zijlstra,
	catalin.marinas, dave.hansen, Michal Hocko, linux-mm,
	Lokesh Gidra, linux-riscv, elfring, Jonas Bonn, linux-s390,
	linux-xtensa, linux-hexagon, deller, Hugh Dickins, jejb,
	kasan-dev, kvmarm, mingo, geert, aryabinin, linux-snps-arc,
	kernel-team, fenghua.yu, jdike, linux-um, Julia Lawall,
	linux-m68k, bp, luto, nios2-dev, kirill, gxt, Chris Zankel,
	richard, linux-parisc, Ramon Pantin, Max Filippov, linux-kernel,
	Minchan Kim, linux-alpha, lftan, Andrew Morton, linuxppc-dev,
	David Miller

On Fri, Oct 12, 2018 at 07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-13 17:50                       ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-13 17:50 UTC (permalink / raw)
  To: linux-snps-arc

On Fri, Oct 12, 2018@07:25:08PM -0700, Daniel Colascione wrote:
[...] 
> > But anyway, I think this runtime detection thing is not needed. THP is
> > actually expected to be as fast as this anyway, so if that's available then
> > we should already be as fast.
> 
> Ah, I think the commit message is confusing. (Or else I'm misreading
> the patch now.) It's not quite that we're disabling the feature when
> THP is enabled anywhere, but rather that we use the move_huge_pmd path
> for huge PMDs and use the new code only for non-huge PMDs. (Right?) If
> that's the case, the commit message shouldn't say "Incase THP is
> enabled, the optimization is skipped". Even if THP is enabled on a
> system generally, we might use the new PMD-moving code for mapping
> types that don't support THP-ization, right?

That is true. Ok, I guess I can update the commit message to be more accurate
about that.

> > This is for non-THP where THP cannot be enabled
> > and there is still room for some improvement. Most/all architectures will be
> > just fine with this. This flag is more of a safety-net type of thing where in
> > the future if there is this one or two weird architectures that don't play
> > well, then they can turn it off at the architecture level by not selecting
> > the flag. See my latest patches for the per-architecture compile-time
> > controls. Ideally we'd like to blanket turn it on on all, but this is just
> > playing it extra safe as Kirill and me were discussing on other threads.
> 
> Sure. I'm just pointing out that the 500x performance different turns
> the operation into a qualitatively different feature, so if we expect
> to actually ship a mainstream architecture without support for this
> thing, we should make it explicit. If we're not, we shouldn't.

We can make it explicit by enabling it in such a mainstream architecture is
my point. Also if the optimization is not doing what its supposed to, then
userspace will also just know by measuring the time.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-12  1:37   ` Joel Fernandes (Google)
                       ` (5 preceding siblings ...)
  (?)
@ 2018-10-15  7:10     ` Christian Borntraeger
  -1 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: kernel-team, minchan, pantin, hughd, lokeshgidra, dancol, mhocko,
	kirill, akpm, Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Dave Hansen, David S. Miller,
	elfring, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Ingo Molnar, James E.J. Bottomley, Jeff Dike,
	Jonas Bonn, Julia Lawall, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, Max Filippov, nios2-dev, openrisc, Peter Zijlstra,
	Richard Weinberger, Rich Felker, Sam Creasey, sparclinux,
	Stafford Horne, Stefan Kristiansson, Thomas Gleixner, Tony Luck,
	Will Deacon, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato, Martin Schwidefsky



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: linux-riscv



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan at kernel.org
> Cc: pantin at google.com
> Cc: hughd at google.com
> Cc: lokeshgidra at google.com
> Cc: dancol at google.com
> Cc: mhocko at kernel.org
> Cc: kirill at shutemov.name
> Cc: akpm at linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Martin Schwidefsky, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, minchan, Thomas Gleixner, linux-alpha,
	Martin Schwidefsky, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan@kernel.org
> Cc: pantin@google.com
> Cc: hughd@google.com
> Cc: lokeshgidra@google.com
> Cc: dancol@google.com
> Cc: mhocko@kernel.org
> Cc: kirill@shutemov.name
> Cc: akpm@linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: linux-snps-arc



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan at kernel.org
> Cc: pantin at google.com
> Cc: hughd at google.com
> Cc: lokeshgidra at google.com
> Cc: dancol at google.com
> Cc: mhocko at kernel.org
> Cc: kirill at shutemov.name
> Cc: akpm at linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  7:10     ` Christian Borntraeger
  0 siblings, 0 replies; 317+ messages in thread
From: Christian Borntraeger @ 2018-10-15  7:10 UTC (permalink / raw)
  To: openrisc



On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> Android needs to mremap large regions of memory during memory management
> related operations. The mremap system call can be really slow if THP is
> not enabled. The bottleneck is move_page_tables, which is copying each
> pte at a time, and can be really slow across a large map. Turning on THP
> may not be a viable option, and is not for us. This patch speeds up the
> performance for non-THP system by copying at the PMD level when possible.
> 
> The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> completion times drops from 160-250 millesconds to 380-400 microseconds.
> 
> Before:
> Total mremap time for 1GB data: 242321014 nanoseconds.
> Total mremap time for 1GB data: 196842467 nanoseconds.
> Total mremap time for 1GB data: 167051162 nanoseconds.
> 
> After:
> Total mremap time for 1GB data: 385781 nanoseconds.
> Total mremap time for 1GB data: 388959 nanoseconds.
> Total mremap time for 1GB data: 402813 nanoseconds.
> 
> Incase THP is enabled, the optimization is skipped. I also flush the
> tlb every time we do this optimization since I couldn't find a way to
> determine if the low-level PTEs are dirty. It is seen that the cost of
> doing so is not much compared the improvement, on both x86-64 and arm64.
> 
> Cc: minchan at kernel.org
> Cc: pantin at google.com
> Cc: hughd at google.com
> Cc: lokeshgidra at google.com
> Cc: dancol at google.com
> Cc: mhocko at kernel.org
> Cc: kirill at shutemov.name
> Cc: akpm at linux-foundation.org
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 9e68a02a52b1..d82c485822ef 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> +		  unsigned long new_addr, unsigned long old_end,
> +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +{
> +	spinlock_t *old_ptl, *new_ptl;
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> +	    || old_end - old_addr < PMD_SIZE)
> +		return false;
> +
> +	/*
> +	 * The destination pmd shouldn't be established, free_pgtables()
> +	 * should have release it.
> +	 */
> +	if (WARN_ON(!pmd_none(*new_pmd)))
> +		return false;
> +
> +	/*
> +	 * We don't have to worry about the ordering of src and dst
> +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> +	 */
> +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> +	if (old_ptl) {
> +		pmd_t pmd;
> +
> +		new_ptl = pmd_lockptr(mm, new_pmd);
> +		if (new_ptl != old_ptl)
> +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		/* Clear the pmd */
> +		pmd = *old_pmd;
> +		pmd_clear(old_pmd);

Adding Martin Schwidefsky.
Is this mapping maybe still in use on other CPUs? If yes, I think for
s390 we need to flush here as well (in other word we might need to introduce
pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
to modify page table entries that are still in use. Otherwise you can get a 
delayed access exception which is - in contrast to page faults - not recoverable.



> +
> +		VM_BUG_ON(!pmd_none(*new_pmd));
> +
> +		/* Set the new pmd */
> +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
> +		spin_unlock(old_ptl);
> +
> +		*need_flush = true;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  unsigned long move_page_tables(struct vm_area_struct *vma,
>  		unsigned long old_addr, struct vm_area_struct *new_vma,
>  		unsigned long new_addr, unsigned long len,
> @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  			split_huge_pmd(vma, old_pmd, old_addr);
>  			if (pmd_trans_unstable(old_pmd))
>  				continue;
> +		} else if (extent == PMD_SIZE) {
> +			bool moved;
> +
> +			/* See comment in move_ptes() */
> +			if (need_rmap_locks)
> +				take_rmap_locks(vma);
> +			moved = move_normal_pmd(vma, old_addr, new_addr,
> +					old_end, old_pmd, new_pmd,
> +					&need_flush);
> +			if (need_rmap_locks)
> +				drop_rmap_locks(vma);
> +			if (moved)
> +				continue;
>  		}
> +
>  		if (pte_alloc(new_vma->vm_mm, new_pmd))
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-15  7:10     ` Christian Borntraeger
                         ` (5 preceding siblings ...)
  (?)
@ 2018-10-15  8:18       ` Martin Schwidefsky
  -1 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: Joel Fernandes (Google),
	linux-kernel, kernel-team, minchan, pantin, hughd, lokeshgidra,
	dancol, mhocko, kirill, akpm, Andrey Ryabinin, Andy Lutomirski,
	Borislav Petkov, Catalin Marinas, Chris Zankel, Dave Hansen,
	David S. Miller, elfring, Fenghua Yu, Geert Uytterhoeven,
	Guan Xuetao, Helge Deller, Ingo Molnar, James E.J. Bottomley,
	Jeff Dike, Jonas Bonn, Julia Lawall, kasan-dev, kvmarm,
	Ley Foon Tan, linux-alpha, linux-arm-kernel, linux-hexagon,
	linux-ia64, linux-m68k, linux-mips, linux-mm, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, linux-snps-arc,
	linux-um, linux-xtensa, Max Filippov, nios2-dev, openrisc,
	Peter Zijlstra, Richard Weinberger, Rich Felker, Sam Creasey,
	sparclinux, Stafford Horne, Stefan Kristiansson, Thomas Gleixner,
	Tony Luck, Will Deacon,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: linux-riscv

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan at kernel.org
> > Cc: pantin at google.com
> > Cc: hughd at google.com
> > Cc: lokeshgidra at google.com
> > Cc: dancol at google.com
> > Cc: mhocko at kernel.org
> > Cc: kirill at shutemov.name
> > Cc: akpm at linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, linux-kernel, minchan, Thomas Gleixner,
	linux-alpha, Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, Joel Fernandes (Google),
	linux-riscv, elfring, Jonas Bonn, linux-s390, dancol,
	Yoshinori Sato, sparclinux, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm, Ingo Molnar,
	Geert Uytterhoeven, Andrey Ryabinin, linux-snps-arc, kernel-team,
	Sam Creasey, Fenghua Yu, Jeff Dike, linux-um,
	Stefan Kristiansson, Julia Lawall, linux-m68k, openrisc,
	Borislav Petkov, Andy Lutomirski, nios2-dev, kirill,
	Stafford Horne, Guan Xuetao, linux-arm-kernel, Chris Zankel,
	Tony Luck, Richard Weinberger, linux-parisc, pantin,
	Max Filippov, linux-kernel, minchan, Thomas Gleixner,
	linux-alpha, Ley Foon Tan, akpm, linuxppc-dev, David S. Miller

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan@kernel.org
> > Cc: pantin@google.com
> > Cc: hughd@google.com
> > Cc: lokeshgidra@google.com
> > Cc: dancol@google.com
> > Cc: mhocko@kernel.org
> > Cc: kirill@shutemov.name
> > Cc: akpm@linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: linux-snps-arc

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan at kernel.org
> > Cc: pantin at google.com
> > Cc: hughd at google.com
> > Cc: lokeshgidra at google.com
> > Cc: dancol at google.com
> > Cc: mhocko at kernel.org
> > Cc: kirill at shutemov.name
> > Cc: akpm at linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-15  8:18       ` Martin Schwidefsky
  0 siblings, 0 replies; 317+ messages in thread
From: Martin Schwidefsky @ 2018-10-15  8:18 UTC (permalink / raw)
  To: openrisc

On Mon, 15 Oct 2018 09:10:53 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > Android needs to mremap large regions of memory during memory management
> > related operations. The mremap system call can be really slow if THP is
> > not enabled. The bottleneck is move_page_tables, which is copying each
> > pte at a time, and can be really slow across a large map. Turning on THP
> > may not be a viable option, and is not for us. This patch speeds up the
> > performance for non-THP system by copying at the PMD level when possible.
> > 
> > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > 
> > Before:
> > Total mremap time for 1GB data: 242321014 nanoseconds.
> > Total mremap time for 1GB data: 196842467 nanoseconds.
> > Total mremap time for 1GB data: 167051162 nanoseconds.
> > 
> > After:
> > Total mremap time for 1GB data: 385781 nanoseconds.
> > Total mremap time for 1GB data: 388959 nanoseconds.
> > Total mremap time for 1GB data: 402813 nanoseconds.
> > 
> > Incase THP is enabled, the optimization is skipped. I also flush the
> > tlb every time we do this optimization since I couldn't find a way to
> > determine if the low-level PTEs are dirty. It is seen that the cost of
> > doing so is not much compared the improvement, on both x86-64 and arm64.
> > 
> > Cc: minchan at kernel.org
> > Cc: pantin at google.com
> > Cc: hughd at google.com
> > Cc: lokeshgidra at google.com
> > Cc: dancol at google.com
> > Cc: mhocko at kernel.org
> > Cc: kirill at shutemov.name
> > Cc: akpm at linux-foundation.org
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 9e68a02a52b1..d82c485822ef 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> >  		drop_rmap_locks(vma);
> >  }
> >  
> > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > +		  unsigned long new_addr, unsigned long old_end,
> > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > +{
> > +	spinlock_t *old_ptl, *new_ptl;
> > +	struct mm_struct *mm = vma->vm_mm;
> > +
> > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > +	    || old_end - old_addr < PMD_SIZE)
> > +		return false;
> > +
> > +	/*
> > +	 * The destination pmd shouldn't be established, free_pgtables()
> > +	 * should have release it.
> > +	 */
> > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > +		return false;
> > +
> > +	/*
> > +	 * We don't have to worry about the ordering of src and dst
> > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > +	 */
> > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > +	if (old_ptl) {
> > +		pmd_t pmd;
> > +
> > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > +
> > +		/* Clear the pmd */
> > +		pmd = *old_pmd;
> > +		pmd_clear(old_pmd);  
> 
> Adding Martin Schwidefsky.
> Is this mapping maybe still in use on other CPUs? If yes, I think for
> s390 we need to flush here as well (in other word we might need to introduce
> pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> to modify page table entries that are still in use. Otherwise you can get a 
> delayed access exception which is - in contrast to page faults - not recoverable.

Just clearing an active pmd would be broken for s390. We need the equivalent
of the ptep_get_and_clear() function for pmds. For s390 this function would
look like this:

static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
                                       unsigned long addr, pmd_t *pmdp)
{
        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}

Just like pmdp_huge_get_and_clear() in fact.

> 
> 
> 
> > +
> > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > +
> > +		/* Set the new pmd */
> > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > +		if (new_ptl != old_ptl)
> > +			spin_unlock(new_ptl);
> > +		spin_unlock(old_ptl);
> > +
> > +		*need_flush = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +

So the idea is to move the pmd entry to the new location, dragging
the whole pte table to a new location with a different address.
I wonder if that is safe in regard to get_user_pages_fast().

> >  unsigned long move_page_tables(struct vm_area_struct *vma,
> >  		unsigned long old_addr, struct vm_area_struct *new_vma,
> >  		unsigned long new_addr, unsigned long len,
> > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  			split_huge_pmd(vma, old_pmd, old_addr);
> >  			if (pmd_trans_unstable(old_pmd))
> >  				continue;
> > +		} else if (extent == PMD_SIZE) {
> > +			bool moved;
> > +
> > +			/* See comment in move_ptes() */
> > +			if (need_rmap_locks)
> > +				take_rmap_locks(vma);
> > +			moved = move_normal_pmd(vma, old_addr, new_addr,
> > +					old_end, old_pmd, new_pmd,
> > +					&need_flush);
> > +			if (need_rmap_locks)
> > +				drop_rmap_locks(vma);
> > +			if (moved)
> > +				continue;
> >  		}
> > +
> >  		if (pte_alloc(new_vma->vm_mm, new_pmd))
> >  			break;
> >  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> >   

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.


^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
  2018-10-15  8:18       ` Martin Schwidefsky
                           ` (5 preceding siblings ...)
  (?)
@ 2018-10-16  2:08         ` Joel Fernandes
  -1 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: Martin Schwidefsky
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm,
	Christian Borntraeger

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan@kernel.org
> > > Cc: pantin@google.com
> > > Cc: hughd@google.com
> > > Cc: lokeshgidra@google.com
> > > Cc: dancol@google.com
> > > Cc: mhocko@kernel.org
> > > Cc: kirill@shutemov.name
> > > Cc: akpm@linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: Martin Schwidefsky
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm,
	Christian Borntraeger

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan@kernel.org
> > > Cc: pantin@google.com
> > > Cc: hughd@google.com
> > > Cc: lokeshgidra@google.com
> > > Cc: dancol@google.com
> > > Cc: mhocko@kernel.org
> > > Cc: kirill@shutemov.name
> > > Cc: akpm@linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: Martin Schwidefsky
  Cc: Christian Borntraeger, linux-kernel, kernel-team, minchan,
	pantin, hughd, lokeshgidra, dancol, mhocko, kirill, akpm,
	Andrey Ryabinin, Andy Lutomirski, Borislav Petkov,
	Catalin Marinas, Chris Zankel, Dave Hansen, David S. Miller,
	elfring, Fenghua Yu, Geert Uytterhoeven, Guan Xuetao,
	Helge Deller, Ingo Molnar, James E.J. Bottomley, Jeff Dike,
	Jonas Bonn, Julia Lawall, kasan-dev, kvmarm, Ley Foon Tan,
	linux-alpha, linux-arm-kernel, linux-hexagon, linux-ia64,
	linux-m68k, linux-mips, linux-mm, linux-parisc, linuxppc-dev,
	linux-riscv, linux-s390, linux-sh, linux-snps-arc, linux-um,
	linux-xtensa, Max Filippov, nios2-dev, openrisc, Peter Zijlstra,
	Richard Weinberger, Rich Felker, Sam Creasey, sparclinux,
	Stafford Horne, Stefan Kristiansson, Thomas Gleixner, Tony Luck,
	Will Deacon, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Yoshinori Sato

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan@kernel.org
> > > Cc: pantin@google.com
> > > Cc: hughd@google.com
> > > Cc: lokeshgidra@google.com
> > > Cc: dancol@google.com
> > > Cc: mhocko@kernel.org
> > > Cc: kirill@shutemov.name
> > > Cc: akpm@linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: linux-riscv

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan at kernel.org
> > > Cc: pantin at google.com
> > > Cc: hughd at google.com
> > > Cc: lokeshgidra at google.com
> > > Cc: dancol at google.com
> > > Cc: mhocko at kernel.org
> > > Cc: kirill at shutemov.name
> > > Cc: akpm at linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: Martin Schwidefsky
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm,
	Christian Borntraeger, Ingo Molnar, Geert Uytterhoeven,
	Andrey Ryabinin, linux-snps-arc, kernel-team, Sam Creasey,
	Fenghua Yu, Jeff Dike, linux-um, Stefan Kristiansson,
	Julia Lawall, linux-m68k, openrisc, Borislav Petkov,
	Andy Lutomirski, nios2-dev, kirill, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, Richard Weinberger,
	linux-parisc, pantin, Max Filippov, linux-kernel, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan@kernel.org
> > > Cc: pantin@google.com
> > > Cc: hughd@google.com
> > > Cc: lokeshgidra@google.com
> > > Cc: dancol@google.com
> > > Cc: mhocko@kernel.org
> > > Cc: kirill@shutemov.name
> > > Cc: akpm@linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 317+ messages in thread

* Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: Martin Schwidefsky
  Cc: linux-mips, Rich Felker, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, mhocko, linux-mm,
	lokeshgidra, sparclinux, linux-riscv, elfring, Jonas Bonn,
	linux-s390, dancol, Yoshinori Sato, linux-xtensa, linux-hexagon,
	Helge Deller, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	hughd, James E.J. Bottomley, kasan-dev, kvmarm,
	Christian Borntraeger, Ingo Molnar, Geert Uytterhoeven,
	Andrey Ryabinin, linux-snps-arc, kernel-team, Sam Creasey,
	Fenghua Yu, Jeff Dike, linux-um, Stefan Kristiansson,
	Julia Lawall, linux-m68k, openrisc, Borislav Petkov,
	Andy Lutomirski, nios2-dev, kirill, Stafford Horne, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Tony Luck, Richard Weinberger,
	linux-parisc, pantin, Max Filippov, linux-kernel, minchan,
	Thomas Gleixner, linux-alpha, Ley Foon Tan, akpm, linuxppc-dev,
	David S. Miller

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan@kernel.org
> > > Cc: pantin@google.com
> > > Cc: hughd@google.com
> > > Cc: lokeshgidra@google.com
> > > Cc: dancol@google.com
> > > Cc: mhocko@kernel.org
> > > Cc: kirill@shutemov.name
> > > Cc: akpm@linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

* [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: linux-snps-arc

On Mon, Oct 15, 2018@10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan at kernel.org
> > > Cc: pantin at google.com
> > > Cc: hughd at google.com
> > > Cc: lokeshgidra at google.com
> > > Cc: dancol at google.com
> > > Cc: mhocko at kernel.org
> > > Cc: kirill at shutemov.name
> > > Cc: akpm at linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel at joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 317+ messages in thread

* [OpenRISC] [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
@ 2018-10-16  2:08         ` Joel Fernandes
  0 siblings, 0 replies; 317+ messages in thread
From: Joel Fernandes @ 2018-10-16  2:08 UTC (permalink / raw)
  To: openrisc

On Mon, Oct 15, 2018 at 10:18:14AM +0200, Martin Schwidefsky wrote:
> On Mon, 15 Oct 2018 09:10:53 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
> > On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote:
> > > Android needs to mremap large regions of memory during memory management
> > > related operations. The mremap system call can be really slow if THP is
> > > not enabled. The bottleneck is move_page_tables, which is copying each
> > > pte at a time, and can be really slow across a large map. Turning on THP
> > > may not be a viable option, and is not for us. This patch speeds up the
> > > performance for non-THP system by copying at the PMD level when possible.
> > > 
> > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap
> > > completion times drops from 160-250 millesconds to 380-400 microseconds.
> > > 
> > > Before:
> > > Total mremap time for 1GB data: 242321014 nanoseconds.
> > > Total mremap time for 1GB data: 196842467 nanoseconds.
> > > Total mremap time for 1GB data: 167051162 nanoseconds.
> > > 
> > > After:
> > > Total mremap time for 1GB data: 385781 nanoseconds.
> > > Total mremap time for 1GB data: 388959 nanoseconds.
> > > Total mremap time for 1GB data: 402813 nanoseconds.
> > > 
> > > Incase THP is enabled, the optimization is skipped. I also flush the
> > > tlb every time we do this optimization since I couldn't find a way to
> > > determine if the low-level PTEs are dirty. It is seen that the cost of
> > > doing so is not much compared the improvement, on both x86-64 and arm64.
> > > 
> > > Cc: minchan at kernel.org
> > > Cc: pantin at google.com
> > > Cc: hughd at google.com
> > > Cc: lokeshgidra at google.com
> > > Cc: dancol at google.com
> > > Cc: mhocko at kernel.org
> > > Cc: kirill at shutemov.name
> > > Cc: akpm at linux-foundation.org
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  mm/mremap.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 62 insertions(+)
> > > 
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index 9e68a02a52b1..d82c485822ef 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
> > >  		drop_rmap_locks(vma);
> > >  }
> > >  
> > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> > > +		  unsigned long new_addr, unsigned long old_end,
> > > +		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> > > +{
> > > +	spinlock_t *old_ptl, *new_ptl;
> > > +	struct mm_struct *mm = vma->vm_mm;
> > > +
> > > +	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
> > > +	    || old_end - old_addr < PMD_SIZE)
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * The destination pmd shouldn't be established, free_pgtables()
> > > +	 * should have release it.
> > > +	 */
> > > +	if (WARN_ON(!pmd_none(*new_pmd)))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * We don't have to worry about the ordering of src and dst
> > > +	 * ptlocks because exclusive mmap_sem prevents deadlock.
> > > +	 */
> > > +	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
> > > +	if (old_ptl) {
> > > +		pmd_t pmd;
> > > +
> > > +		new_ptl = pmd_lockptr(mm, new_pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> > > +
> > > +		/* Clear the pmd */
> > > +		pmd = *old_pmd;
> > > +		pmd_clear(old_pmd);  
> > 
> > Adding Martin Schwidefsky.
> > Is this mapping maybe still in use on other CPUs? If yes, I think for
> > s390 we need to flush here as well (in other word we might need to introduce
> > pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE
> > to modify page table entries that are still in use. Otherwise you can get a 
> > delayed access exception which is - in contrast to page faults - not recoverable.
> 
> Just clearing an active pmd would be broken for s390. We need the equivalent
> of the ptep_get_and_clear() function for pmds. For s390 this function would
> look like this:
> 
> static inline pte_t pmdp_get_and_clear(struct mm_struct *mm,
>                                        unsigned long addr, pmd_t *pmdp)
> {
>         return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
> }
> 
> Just like pmdp_huge_get_and_clear() in fact.

I agree architecture like s390 may need additional explicit instructions to
avoid any unrecoverable failure. So the good news is in my last patch I sent, I
have put this behind an architecture flag (HAVE_MOVE_PMD), so we don't have
to enable it with architectures that cannot handle it:
https://www.spinics.net/lists/linux-mm/msg163621.html

Also we are triggering this optimization only if the page is not a transparent
huge page by calling pmd_trans_huge(). For regular pages, it should be safe to
not do the atomic get_and_clear AIUI because Linux doesn't use any bits from
the PMD like the dirty bit if THP is not in use (and the processors that I
saw (not s390) should not storing anything in the bits anyway when the page
is not a huge page. I have gone through various scenarios and read both arm
32-bit and 64-bit and x86 64-bit manuals, and I believe it to be safe.

For s390, lets not set the HAVE_MOVE_PMD flag. Does that work for you?

> > > +
> > > +		VM_BUG_ON(!pmd_none(*new_pmd));
> > > +
> > > +		/* Set the new pmd */
> > > +		set_pmd_at(mm, new_addr, new_pmd, pmd);
> > > +		if (new_ptl != old_ptl)
> > > +			spin_unlock(new_ptl);
> > > +		spin_unlock(old_ptl);
> > > +
> > > +		*need_flush = true;
> > > +		return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> 
> So the idea is to move the pmd entry to the new location, dragging
> the whole pte table to a new location with a different address.
> I wonder if that is safe in regard to get_user_pages_fast().

Could you elaborate why you feel it may not be?

Are you concerned that the PMD moving interferes with the page walk? Incase
the tree changes during page-walking, the number of pages pinned by
get_user_pages_fast may be less than the number requested. In this case,
get_user_pages_fast would fall back to the slow path which should be
synchronized with the mremap by courtesy of the mm->mmap_sem. But please let
me know the scenario you have in mind and if I missed something.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 317+ messages in thread

end of thread, other threads:[~2018-10-16  3:30 UTC | newest]

Thread overview: 317+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-12  1:37 [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions Joel Fernandes (Google)
2018-10-12  1:37 ` [OpenRISC] " Joel Fernandes
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` Joel Fernandes (Google)
2018-10-12  1:37 ` [PATCH v2 2/2] mm: speed up mremap by 500x on large regions Joel Fernandes (Google)
2018-10-12  1:37   ` [OpenRISC] " Joel Fernandes
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  1:37   ` Joel Fernandes (Google)
2018-10-12  6:40   ` Anton Ivanov
2018-10-12 11:30   ` Kirill A. Shutemov
2018-10-12 11:30     ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 11:30     ` Kirill A. Shutemov
2018-10-12 11:30     ` Kirill A. Shutemov
2018-10-12 11:30     ` Kirill A. Shutemov
2018-10-12 11:30     ` Kirill A. Shutemov
2018-10-12 11:30     ` Kirill A. Shutemov
2018-10-12 11:30     ` Kirill A. Shutemov
2018-10-12 11:36     ` Kirill A. Shutemov
2018-10-12 11:36       ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 11:36       ` Kirill A. Shutemov
2018-10-12 11:36       ` Kirill A. Shutemov
2018-10-12 11:36       ` Kirill A. Shutemov
2018-10-12 11:36       ` Kirill A. Shutemov
2018-10-12 11:36       ` Kirill A. Shutemov
2018-10-12 11:36       ` Kirill A. Shutemov
2018-10-12 12:50     ` Joel Fernandes
2018-10-12 12:50       ` [OpenRISC] " Joel Fernandes
2018-10-12 12:50       ` Joel Fernandes
2018-10-12 12:50       ` Joel Fernandes
2018-10-12 12:50       ` Joel Fernandes
2018-10-12 12:50       ` Joel Fernandes
2018-10-12 12:50       ` Joel Fernandes
2018-10-12 12:50       ` Joel Fernandes
2018-10-12 13:19       ` Kirill A. Shutemov
2018-10-12 13:19         ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 13:19         ` Kirill A. Shutemov
2018-10-12 13:19         ` Kirill A. Shutemov
2018-10-12 13:19         ` Kirill A. Shutemov
2018-10-12 13:19         ` Kirill A. Shutemov
2018-10-12 13:19         ` Kirill A. Shutemov
2018-10-12 13:19         ` Kirill A. Shutemov
2018-10-12 16:57         ` Joel Fernandes
2018-10-12 16:57           ` [OpenRISC] " Joel Fernandes
2018-10-12 16:57           ` Joel Fernandes
2018-10-12 16:57           ` Joel Fernandes
2018-10-12 16:57           ` Joel Fernandes
2018-10-12 16:57           ` Joel Fernandes
2018-10-12 16:57           ` Joel Fernandes
2018-10-12 16:57           ` Joel Fernandes
2018-10-12 21:33           ` Kirill A. Shutemov
2018-10-12 21:33             ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 21:33             ` Kirill A. Shutemov
2018-10-12 21:33             ` Kirill A. Shutemov
2018-10-12 21:33             ` Kirill A. Shutemov
2018-10-12 21:33             ` Kirill A. Shutemov
2018-10-12 21:33             ` Kirill A. Shutemov
2018-10-12 21:33             ` Kirill A. Shutemov
2018-10-12 18:18       ` David Miller
2018-10-12 18:18         ` [OpenRISC] " David Miller
2018-10-12 18:18         ` David Miller
2018-10-12 18:18         ` David Miller
2018-10-12 18:18         ` David Miller
2018-10-12 18:18         ` David Miller
2018-10-12 18:18         ` David Miller
2018-10-12 18:18         ` David Miller
2018-10-13  1:35         ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:35           ` Joel Fernandes
2018-10-13  1:39           ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:39             ` Daniel Colascione
2018-10-13  1:44             ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:44               ` Joel Fernandes
2018-10-13  1:54               ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  1:54                 ` Daniel Colascione
2018-10-13  2:10                 ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:10                   ` Joel Fernandes
2018-10-13  2:25                   ` Daniel Colascione
2018-10-13  2:25                     ` Daniel Colascione
2018-10-13  2:25                     ` Daniel Colascione
2018-10-13  2:25                     ` Daniel Colascione
2018-10-13  2:25                     ` Daniel Colascione
2018-10-13  2:25                     ` Daniel Colascione
2018-10-13  2:25                     ` Daniel Colascione
2018-10-13 17:50                     ` Joel Fernandes
2018-10-13 17:50                       ` Joel Fernandes
2018-10-13 17:50                       ` Joel Fernandes
2018-10-13 17:50                       ` Joel Fernandes
2018-10-13 17:50                       ` Joel Fernandes
2018-10-13 17:50                       ` Joel Fernandes
2018-10-13 17:50                       ` Joel Fernandes
2018-10-12 18:02     ` David Miller
2018-10-12 18:02       ` [OpenRISC] " David Miller
2018-10-12 18:02       ` David Miller
2018-10-12 18:02       ` David Miller
2018-10-12 18:02       ` David Miller
2018-10-12 18:02       ` David Miller
2018-10-12 18:02       ` David Miller
2018-10-12 18:02       ` David Miller
2018-10-12 14:09   ` Anton Ivanov
2018-10-12 14:09     ` [OpenRISC] " Anton Ivanov
2018-10-12 14:09     ` Anton Ivanov
2018-10-12 14:09     ` Anton Ivanov
2018-10-12 14:09     ` Anton Ivanov
2018-10-12 14:09     ` Anton Ivanov
2018-10-12 14:09     ` Anton Ivanov
2018-10-12 14:09     ` Anton Ivanov
2018-10-12 14:37     ` Kirill A. Shutemov
2018-10-12 14:37       ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 14:37       ` Kirill A. Shutemov
2018-10-12 14:37       ` Kirill A. Shutemov
2018-10-12 14:37       ` Kirill A. Shutemov
2018-10-12 14:37       ` Kirill A. Shutemov
2018-10-12 14:37       ` Kirill A. Shutemov
2018-10-12 14:37       ` Kirill A. Shutemov
2018-10-12 14:48       ` Anton Ivanov
2018-10-12 14:48         ` [OpenRISC] " Anton Ivanov
2018-10-12 14:48         ` Anton Ivanov
2018-10-12 14:48         ` Anton Ivanov
2018-10-12 14:48         ` Anton Ivanov
2018-10-12 14:48         ` Anton Ivanov
2018-10-12 14:48         ` Anton Ivanov
2018-10-12 14:48         ` Anton Ivanov
2018-10-12 16:42         ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` [OpenRISC] " Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:42           ` Anton Ivanov
2018-10-12 16:50           ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` [OpenRISC] " Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:50             ` Joel Fernandes
2018-10-12 16:58             ` Anton Ivanov
2018-10-12 16:58               ` [OpenRISC] " Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 16:58               ` Anton Ivanov
2018-10-12 17:06               ` Joel Fernandes
2018-10-12 17:06                 ` [OpenRISC] " Joel Fernandes
2018-10-12 17:06                 ` Joel Fernandes
2018-10-12 17:06                 ` Joel Fernandes
2018-10-12 17:06                 ` Joel Fernandes
2018-10-12 17:06                 ` Joel Fernandes
2018-10-12 17:06                 ` Joel Fernandes
2018-10-12 17:06                 ` Joel Fernandes
2018-10-12 21:40           ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-12 21:40             ` Kirill A. Shutemov
2018-10-13  6:10             ` Anton Ivanov
2018-10-13  6:10               ` [OpenRISC] " Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-13  6:10               ` Anton Ivanov
2018-10-15  7:10   ` Christian Borntraeger
2018-10-15  7:10     ` [OpenRISC] " Christian Borntraeger
2018-10-15  7:10     ` Christian Borntraeger
2018-10-15  7:10     ` Christian Borntraeger
2018-10-15  7:10     ` Christian Borntraeger
2018-10-15  7:10     ` Christian Borntraeger
2018-10-15  7:10     ` Christian Borntraeger
2018-10-15  7:10     ` Christian Borntraeger
2018-10-15  8:18     ` Martin Schwidefsky
2018-10-15  8:18       ` [OpenRISC] " Martin Schwidefsky
2018-10-15  8:18       ` Martin Schwidefsky
2018-10-15  8:18       ` Martin Schwidefsky
2018-10-15  8:18       ` Martin Schwidefsky
2018-10-15  8:18       ` Martin Schwidefsky
2018-10-15  8:18       ` Martin Schwidefsky
2018-10-15  8:18       ` Martin Schwidefsky
2018-10-16  2:08       ` Joel Fernandes
2018-10-16  2:08         ` [OpenRISC] " Joel Fernandes
2018-10-16  2:08         ` Joel Fernandes
2018-10-16  2:08         ` Joel Fernandes
2018-10-16  2:08         ` Joel Fernandes
2018-10-16  2:08         ` Joel Fernandes
2018-10-16  2:08         ` Joel Fernandes
2018-10-16  2:08         ` Joel Fernandes
2018-10-12 11:09 ` [PATCH v2 1/2] treewide: remove unused address argument from pte_alloc functions Kirill A. Shutemov
2018-10-12 11:09   ` [OpenRISC] " Kirill A. Shutemov
2018-10-12 11:09   ` Kirill A. Shutemov
2018-10-12 11:09   ` Kirill A. Shutemov
2018-10-12 11:09   ` Kirill A. Shutemov
2018-10-12 11:09   ` Kirill A. Shutemov
2018-10-12 11:09   ` Kirill A. Shutemov
2018-10-12 11:09   ` Kirill A. Shutemov
2018-10-12 16:37   ` Joel Fernandes
2018-10-12 16:37     ` [OpenRISC] " Joel Fernandes
2018-10-12 16:37     ` Joel Fernandes
2018-10-12 16:37     ` Joel Fernandes
2018-10-12 16:37     ` Joel Fernandes
2018-10-12 16:37     ` Joel Fernandes
2018-10-12 16:37     ` Joel Fernandes
2018-10-12 16:37     ` Joel Fernandes
2018-10-12 13:56 ` Anton Ivanov
2018-10-12 13:56   ` [OpenRISC] " Anton Ivanov
2018-10-12 13:56   ` Anton Ivanov
2018-10-12 13:56   ` Anton Ivanov
2018-10-12 13:56   ` Anton Ivanov
2018-10-12 13:56   ` Anton Ivanov
2018-10-12 13:56   ` Anton Ivanov
2018-10-12 13:56   ` Anton Ivanov
2018-10-12 16:34   ` Joel Fernandes
2018-10-12 16:34     ` [OpenRISC] " Joel Fernandes
2018-10-12 16:34     ` Joel Fernandes
2018-10-12 16:34     ` Joel Fernandes
2018-10-12 16:34     ` Joel Fernandes
2018-10-12 16:34     ` Joel Fernandes
2018-10-12 16:34     ` Joel Fernandes
2018-10-12 16:34     ` Joel Fernandes
2018-10-12 16:38     ` Julia Lawall
2018-10-12 16:38       ` [OpenRISC] " Julia Lawall
2018-10-12 16:38       ` Julia Lawall
2018-10-12 16:38       ` Julia Lawall
2018-10-12 16:38       ` Julia Lawall
2018-10-12 16:38       ` Julia Lawall
2018-10-12 16:38       ` Julia Lawall
2018-10-12 16:38       ` Julia Lawall
2018-10-12 16:46       ` Joel Fernandes
2018-10-12 16:46         ` [OpenRISC] " Joel Fernandes
2018-10-12 16:46         ` Joel Fernandes
2018-10-12 16:46         ` Joel Fernandes
2018-10-12 16:46         ` Joel Fernandes
2018-10-12 16:46         ` Joel Fernandes
2018-10-12 16:46         ` Joel Fernandes
2018-10-12 16:46         ` Joel Fernandes
2018-10-12 18:51 ` SF Markus Elfring
2018-10-12 18:51   ` [OpenRISC] " SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 18:51   ` SF Markus Elfring
2018-10-12 19:42   ` Joel Fernandes
2018-10-12 19:42     ` [OpenRISC] " Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-12 19:42     ` Joel Fernandes
2018-10-13  9:22     ` SF Markus Elfring
2018-10-13  9:22       ` [OpenRISC] " SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring
2018-10-13  9:22       ` SF Markus Elfring

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.