From: Cannon Matthews <cannonmatthews@google.com>
To: Mike Kravetz <mike.kravetz@oracle.com>,
Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>,
Michal Hocko <mhocko@kernel.org>,
David Rientjes <rientjes@google.com>,
Greg Thelen <gthelen@google.com>, Salman Qazi <sqazi@google.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Cannon Matthews <cannonmatthews@google.com>
Subject: [PATCH] mm: clear 1G pages with streaming stores on x86
Date: Fri, 6 Mar 2020 17:03:53 -0800 [thread overview]
Message-ID: <20200307010353.172991-1-cannonmatthews@google.com> (raw)
Reimplement clear_gigantic_page() to clear gigabytes pages using the
non-temporal streaming store instructions that bypass the cache
(movnti), since an entire 1GiB region will not fit in the cache anyway.
Doing an mlock() on a 512GiB 1G-hugetlb region previously would take on
average 134 seconds, about 260ms/GiB which is quite slow. Using `movnti`
and optimizing the control flow over the constituent small pages, this
can be improved roughly by a factor of 3-4x, with the 512GiB mlock()
taking only 34 seconds on average, or 67ms/GiB.
The assembly code for the __clear_page_nt routine is more or less
taken directly from the output of gcc with -O3 for this function with
some tweaks to support arbitrary sizes and moving memory barriers:
void clear_page_nt_64i (void *page)
{
for (int i = 0; i < GiB /sizeof(long long int); ++i)
{
_mm_stream_si64 (((long long int*)page) + i, 0);
}
sfence();
}
Tested:
Time to `mlock()` a 512GiB region on broadwell CPU
AVG time (s) % imp. ms/page
clear_page_erms 133.584 - 261
clear_page_nt 34.154 74.43% 67
An earlier version of this code was sent as an RFC patch ~July 2018
https://patchwork.kernel.org/patch/10543193/ but never merged.
Signed-off-by: Cannon Matthews <cannonmatthews@google.com>
---
MAINTAINERS | 1 +
arch/x86/Kconfig | 4 ++++
arch/x86/include/asm/page_64.h | 1 +
arch/x86/lib/Makefile | 2 +-
arch/x86/lib/clear_gigantic_page.c | 28 ++++++++++++++++++++++++++++
arch/x86/lib/clear_page_64.S | 19 +++++++++++++++++++
include/linux/mm.h | 2 ++
mm/memory.c | 2 ++
8 files changed, 58 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/lib/clear_gigantic_page.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 68eebf3650ac..efe84f085404 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7702,6 +7702,7 @@ S: Maintained
F: fs/hugetlbfs/
F: mm/hugetlb.c
F: include/linux/hugetlb.h
+F: arch/x86/lib/clear_gigantic_page.c
F: Documentation/admin-guide/mm/hugetlbpage.rst
F: Documentation/vm/hugetlbfs_reserv.rst
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index beea77046f9b..f49e7b6f6851 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -70,6 +70,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_CLEAR_GIGANTIC_PAGE if X86_64
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
@@ -290,6 +291,9 @@ config ARCH_MAY_HAVE_PC_FDC
config GENERIC_CALIBRATE_DELAY
def_bool y
+config ARCH_HAS_CLEAR_GIGANTIC_PAGE
+ bool
+
config ARCH_HAS_CPU_RELAX
def_bool y
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cff4a7b..6ea60883b6d6 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -55,6 +55,7 @@ static inline void clear_page(void *page)
}
void copy_page(void *to, void *from);
+void clear_page_nt(void *page, u64 page_size);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 5246db42de45..a620c6636210 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -56,7 +56,7 @@ endif
else
obj-y += iomap_copy_64.o
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
- lib-y += clear_page_64.o copy_page_64.o
+ lib-y += clear_page_64.o copy_page_64.o clear_gigantic_page.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o
lib-y += cmpxchg16b_emu.o
diff --git a/arch/x86/lib/clear_gigantic_page.c b/arch/x86/lib/clear_gigantic_page.c
new file mode 100644
index 000000000000..6fcb494ec9bc
--- /dev/null
+++ b/arch/x86/lib/clear_gigantic_page.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/page.h>
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+
+void clear_gigantic_page(struct page *page, unsigned long addr,
+ unsigned int pages)
+{
+ int i;
+ void *dest = page_to_virt(page);
+
+ /*
+ * cond_resched() every 2M. Hypothetical page sizes not divisible by
+ * this are not supported.
+ */
+ BUG_ON(pages % HPAGE_PMD_NR != 0);
+ for (i = 0; i < pages; i += HPAGE_PMD_NR) {
+ clear_page_nt(dest + (i * PAGE_SIZE), HPAGE_PMD_NR * PAGE_SIZE);
+ cond_resched();
+ }
+ /* clear_page_nt requires an `sfence` barrier. */
+ wmb();
+}
+#endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) */
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index c4c7dd115953..1224094fd863 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -50,3 +50,22 @@ SYM_FUNC_START(clear_page_erms)
ret
SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)
+
+/*
+ * Zero memory using non temporal stores, bypassing the cache.
+ * Requires an `sfence` (wmb()) afterwards.
+ * %rdi - destination.
+ * %rsi - page size. Must be 64 bit aligned.
+*/
+SYM_FUNC_START(clear_page_nt)
+ leaq (%rdi,%rsi), %rdx
+ xorl %eax, %eax
+ .p2align 4,,10
+ .p2align 3
+.L2:
+ movnti %rax, (%rdi)
+ addq $8, %rdi
+ cmpq %rdx, %rdi
+ jne .L2
+ ret
+SYM_FUNC_END(clear_page_nt)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c54fb96cb1e6..a57f9007374b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2856,6 +2856,8 @@ enum mf_action_page_type {
};
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+extern void clear_gigantic_page(struct page *page, unsigned long addr,
+ unsigned int pages);
extern void clear_huge_page(struct page *page,
unsigned long addr_hint,
unsigned int pages_per_huge_page);
diff --git a/mm/memory.c b/mm/memory.c
index e8bfdf0d9d1d..2a13bf102890 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4706,6 +4706,7 @@ static inline void process_huge_page(
}
}
+#ifndef CONFIG_ARCH_HAS_CLEAR_GIGANTIC_PAGE
static void clear_gigantic_page(struct page *page,
unsigned long addr,
unsigned int pages_per_huge_page)
@@ -4720,6 +4721,7 @@ static void clear_gigantic_page(struct page *page,
clear_user_highpage(p, addr + i * PAGE_SIZE);
}
}
+#endif /* CONFIG_ARCH_HAS_CLEAR_GIGANTIC_PAGE */
static void clear_subpage(unsigned long addr, int idx, void *arg)
{
--
2.25.1.481.gfbce0eb801-goog
next reply other threads:[~2020-03-07 1:04 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-07 1:03 Cannon Matthews [this message]
2020-03-07 15:36 ` [PATCH] mm: clear 1G pages with streaming stores on x86 kbuild test robot
2020-03-07 22:06 ` Andrew Morton
2020-03-09 0:08 ` Kirill A. Shutemov
2020-03-09 9:06 ` Michal Hocko
2020-03-09 9:35 ` Kirill A. Shutemov
2020-03-09 11:36 ` Kirill A. Shutemov
2020-03-09 12:26 ` Michal Hocko
2020-03-09 18:01 ` Mike Kravetz
2020-03-09 15:38 ` Andi Kleen
2020-03-09 18:37 ` Matthew Wilcox
2020-03-11 0:21 ` Cannon Matthews
2020-03-11 0:54 ` Kirill A. Shutemov
2020-03-11 3:35 ` Arvind Sankar
2020-03-11 8:16 ` Kirill A. Shutemov
2020-03-11 18:32 ` Arvind Sankar
2020-03-11 20:32 ` Arvind Sankar
2020-03-12 0:52 ` Kirill A. Shutemov
2020-03-31 0:40 ` Elliott, Robert (Servers)
2020-03-16 10:18 ` Michal Hocko
2020-03-16 12:19 ` Kirill A. Shutemov
2020-03-26 19:46 ` Matthew Wilcox
2020-03-11 15:07 ` David Laight
2020-03-09 15:33 ` Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200307010353.172991-1-cannonmatthews@google.com \
--to=cannonmatthews@google.com \
--cc=akpm@linux-foundation.org \
--cc=gthelen@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=mike.kravetz@oracle.com \
--cc=rientjes@google.com \
--cc=sqazi@google.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).