linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/sys_x86_64: fix VMA alginment for mmap file to THP
@ 2022-07-29 19:42 alexlzhu, alexlzhu
  2022-07-29 19:46 ` Matthew Wilcox
  0 siblings, 1 reply; 3+ messages in thread
From: alexlzhu, alexlzhu @ 2022-07-29 19:42 UTC (permalink / raw)
  To: linux-mm, kernel-team, linux-kernel; +Cc: alexlzhu

From: alexlzhu <alexlzhu@fb.com>

With CONFIG_READ_ONLY_THP_FOR_FS, the Linux kernel supports using THPs for
read-only mmapped files, such as shared libraries. However, on x86 the
kernel makes no attempt to actually align those mappings on 2MB boundaries,
which makes it impossible to use those THPs most of the time. This issue
applies to general file mapping THP as well as existing setups using
CONFIG_READ_ONLY_THP_FOR_FS. This is easily fixed by using the alignment
info passed to vm_unmapped_area. The problem can be seen in
/proc/PID/smaps where THPeligible is set to 0 on mappings to eligible
shared object files as shown below.

Before this patch:

7fc6a7e18000-7fc6a80cc000 r-xp 00000000 00:1e 199856
/usr/lib64/libcrypto.so.1.1.1k
Size:               2768 kB
THPeligible:    0
VmFlags: rd ex mr mw me

With this patch the library is mapped at a 2MB aligned address:

fbdfe200000-7fbdfe4b4000 r-xp 00000000 00:1e 199856
/usr/lib64/libcrypto.so.1.1.1k
Size:               2768 kB
THPeligible:    1
VmFlags: rd ex mr mw me

This fixes the alignment of VMAs for any mmap of a file that has the
rd and ex permissions and size >= 2MB. The VMA alignment and
THPeligible field for shared and anonymous memory are handled
separately and are thus not effected by this change.

Signed-off-by: alexlzhu <alexlzhu@fb.com>
---
 arch/x86/entry/vdso/vma.c    |  2 +-
 arch/x86/include/asm/elf.h   |  2 +-
 arch/x86/kernel/sys_x86_64.c | 29 ++++++++++++++++++-----------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 1000d457c332..da916040d2ba 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -337,7 +337,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
 	 * Forcibly align the final address in case we have a hardware
 	 * issue that requires alignment for performance reasons.
 	 */
-	addr = align_vdso_addr(addr);
+	addr = align_vdso_addr(addr, len);
 
 	return addr;
 }
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index cb0ff1055ab1..65a09a0e0e97 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -396,5 +396,5 @@ struct va_alignment {
 } ____cacheline_aligned;
 
 extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
+extern unsigned long align_vdso_addr(unsigned long addr, unsigned long len);
 #endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 8cc653ffdccd..2506242e37aa 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -25,11 +25,18 @@
 /*
  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
  */
-static unsigned long get_align_mask(void)
+static unsigned long get_align_mask(unsigned long len)
 {
 	/* handle 32- and 64-bit case with a single conditional */
-	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
+	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) {
+		/*
+		 * Read-only file mappings can be mapped using transparent huge pages;
+		 * make sure that large mappings are 2MB aligned.
+		 */
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && len >= PMD_SIZE)
+			return PMD_SIZE - 1;
 		return 0;
+	}
 
 	if (!(current->flags & PF_RANDOMIZE))
 		return 0;
@@ -47,16 +54,16 @@ static unsigned long get_align_mask(void)
  * value before calling vm_unmapped_area() or ORed directly to the
  * address.
  */
-static unsigned long get_align_bits(void)
+static unsigned long get_align_bits(unsigned long len)
 {
-	return va_align.bits & get_align_mask();
+	return va_align.bits & get_align_mask(len);
 }
 
-unsigned long align_vdso_addr(unsigned long addr)
+unsigned long align_vdso_addr(unsigned long addr, unsigned long len)
 {
-	unsigned long align_mask = get_align_mask();
+	unsigned long align_mask = get_align_mask(len);
 	addr = (addr + align_mask) & ~align_mask;
-	return addr | get_align_bits();
+	return addr | get_align_bits(len);
 }
 
 static int __init control_va_addr_alignment(char *str)
@@ -151,8 +158,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	if (filp) {
-		info.align_mask = get_align_mask();
-		info.align_offset += get_align_bits();
+		info.align_mask = get_align_mask(len);
+		info.align_offset += get_align_bits(len);
 	}
 	return vm_unmapped_area(&info);
 }
@@ -209,8 +216,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	if (filp) {
-		info.align_mask = get_align_mask();
-		info.align_offset += get_align_bits();
+		info.align_mask = get_align_mask(len);
+		info.align_offset += get_align_bits(len);
 	}
 	addr = vm_unmapped_area(&info);
 	if (!(addr & ~PAGE_MASK))
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86/sys_x86_64: fix VMA alginment for mmap file to THP
  2022-07-29 19:42 [PATCH] x86/sys_x86_64: fix VMA alginment for mmap file to THP alexlzhu, alexlzhu
@ 2022-07-29 19:46 ` Matthew Wilcox
  2022-08-01 22:21   ` Alex Zhu (Kernel)
  0 siblings, 1 reply; 3+ messages in thread
From: Matthew Wilcox @ 2022-07-29 19:46 UTC (permalink / raw)
  To: alexlzhu; +Cc: linux-mm, kernel-team, linux-kernel

On Fri, Jul 29, 2022 at 12:42:14PM -0700, alexlzhu@devvm6390.atn0.facebook.com wrote:
> From: alexlzhu <alexlzhu@fb.com>
> 
> With CONFIG_READ_ONLY_THP_FOR_FS, the Linux kernel supports using THPs for
> read-only mmapped files, such as shared libraries. However, on x86 the
> kernel makes no attempt to actually align those mappings on 2MB boundaries,
> which makes it impossible to use those THPs most of the time. This issue
> applies to general file mapping THP as well as existing setups using
> CONFIG_READ_ONLY_THP_FOR_FS. This is easily fixed by using the alignment
> info passed to vm_unmapped_area. The problem can be seen in
> /proc/PID/smaps where THPeligible is set to 0 on mappings to eligible
> shared object files as shown below.

Can't your filesystem just use thp_get_unmapped_area() like
ext2/ext4/fuse/xfs do?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86/sys_x86_64: fix VMA alginment for mmap file to THP
  2022-07-29 19:46 ` Matthew Wilcox
@ 2022-08-01 22:21   ` Alex Zhu (Kernel)
  0 siblings, 0 replies; 3+ messages in thread
From: Alex Zhu (Kernel) @ 2022-08-01 22:21 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-mm, Kernel Team, linux-kernel


Hi Matthew,

Thanks for your suggestion. I tested thp_get_unmapped_area on btrfs and that does indeed solve the issue. Have posted a v2 including the btrfs maintainers. 

Thanks!
Alex


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-08-01 22:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-29 19:42 [PATCH] x86/sys_x86_64: fix VMA alginment for mmap file to THP alexlzhu, alexlzhu
2022-07-29 19:46 ` Matthew Wilcox
2022-08-01 22:21   ` Alex Zhu (Kernel)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).