linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs
@ 2019-12-20 14:25 Kirill A. Shutemov
  2019-12-20 14:25 ` [PATCH 1/2] thp: Fix conflict of above-47bit hint address and PMD alignment Kirill A. Shutemov
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2019-12-20 14:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Willhalm, Thomas, Dan Williams, Bruggeman, Otto G,
	Aneesh Kumar K . V, linux-mm, x86, linux-kernel,
	Kirill A. Shutemov

The two get_unmapped_area() implementations have to be fixed to provide
THP-friendly mappings if above-47bit hint address is specified.

Kirill A. Shutemov (2):
  thp: Fix conflict of above-47bit hint address and PMD alignment
  thp, shmem: Fix conflict of above-47bit hint address and PMD alignment

 mm/huge_memory.c | 38 ++++++++++++++++++++++++--------------
 mm/shmem.c       |  7 ++++---
 2 files changed, 28 insertions(+), 17 deletions(-)

-- 
2.24.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] thp: Fix conflict of above-47bit hint address and PMD alignment
  2019-12-20 14:25 [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Kirill A. Shutemov
@ 2019-12-20 14:25 ` Kirill A. Shutemov
  2019-12-20 14:25 ` [PATCH 2/2] thp, shmem: " Kirill A. Shutemov
  2019-12-23 22:25 ` [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Andrew Morton
  2 siblings, 0 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2019-12-20 14:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Willhalm, Thomas, Dan Williams, Bruggeman, Otto G,
	Aneesh Kumar K . V, linux-mm, x86, linux-kernel,
	Kirill A. Shutemov

Filesystems use thp_get_unmapped_area() to provide THP-friendly
mappings. For DAX in particular.

Normally, the kernel doesn't create userspace mappings above 47-bit,
even if the machine allows this (such as with 5-level paging on x86-64).
Not all user space is ready to handle wide addresses. It's known that
at least some JIT compilers use higher bits in pointers to encode their
information.

Userspace can ask for allocation from full address space by specifying
hint address (with or without MAP_FIXED) above 47-bits. If the
application doesn't need a particular address, but wants to allocate
from whole address space it can specify -1 as a hint address.

Unfortunately, this trick breaks thp_get_unmapped_area(): the function
would not try to allocate PMD-aligned area if *any* hint address
specified.

Modify the routine to handle it correctly:

 - Try to allocate the space at the specified hint address with length
   padding required for PMD alignment.
 - If failed, retry without length padding (but with the same hint
   address);
 - If the returned address matches the hint address return it.
 - Otherwise, align the address as required for THP and return.

The user specified hint address is passed down to get_unmapped_area() so
above-47bit hint address will be taken into account without breaking
alignment requirements.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace")
Reported-by: Thomas Willhalm <thomas.willhalm@intel.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
---
 mm/huge_memory.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 969653530c8f..dccadfbc9994 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -527,13 +527,13 @@ void prep_transhuge_page(struct page *page)
 	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 }
 
-static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,
+static unsigned long __thp_get_unmapped_area(struct file *filp,
+		unsigned long addr, unsigned long len,
 		loff_t off, unsigned long flags, unsigned long size)
 {
-	unsigned long addr;
 	loff_t off_end = off + len;
 	loff_t off_align = round_up(off, size);
-	unsigned long len_pad;
+	unsigned long len_pad, ret;
 
 	if (off_end <= off_align || (off_end - off_align) < size)
 		return 0;
@@ -542,30 +542,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le
 	if (len_pad < len || (off + len_pad) < off)
 		return 0;
 
-	addr = current->mm->get_unmapped_area(filp, 0, len_pad,
+	ret = current->mm->get_unmapped_area(filp, addr, len_pad,
 					      off >> PAGE_SHIFT, flags);
-	if (IS_ERR_VALUE(addr))
+
+	/*
+	 * The failure might be due to length padding. The caller will retry
+	 * without the padding.
+	 */
+	if (IS_ERR_VALUE(ret))
 		return 0;
 
-	addr += (off - addr) & (size - 1);
-	return addr;
+	/*
+	 * Do not try to align to THP boundary if allocation at the address
+	 * hint succeeds.
+	 */
+	if (ret == addr)
+		return addr;
+
+	ret += (off - ret) & (size - 1);
+	return ret;
 }
 
 unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
+	unsigned long ret;
 	loff_t off = (loff_t)pgoff << PAGE_SHIFT;
 
-	if (addr)
-		goto out;
 	if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
 		goto out;
 
-	addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE);
-	if (addr)
-		return addr;
-
- out:
+	ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE);
+	if (ret)
+		return ret;
+out:
 	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
 }
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] thp, shmem: Fix conflict of above-47bit hint address and PMD alignment
  2019-12-20 14:25 [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Kirill A. Shutemov
  2019-12-20 14:25 ` [PATCH 1/2] thp: Fix conflict of above-47bit hint address and PMD alignment Kirill A. Shutemov
@ 2019-12-20 14:25 ` Kirill A. Shutemov
  2019-12-28  0:10   ` kbuild test robot
  2019-12-23 22:25 ` [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Andrew Morton
  2 siblings, 1 reply; 6+ messages in thread
From: Kirill A. Shutemov @ 2019-12-20 14:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Willhalm, Thomas, Dan Williams, Bruggeman, Otto G,
	Aneesh Kumar K . V, linux-mm, x86, linux-kernel,
	Kirill A. Shutemov

Shmem/tmpfs tries to provide THP-friendly mappings if huge pages are
enabled. But it doesn't work well with above-47bit hint address.

Normally, the kernel doesn't create userspace mappings above 47-bit,
even if the machine allows this (such as with 5-level paging on x86-64).
Not all user space is ready to handle wide addresses. It's known that
at least some JIT compilers use higher bits in pointers to encode their
information.

Userspace can ask for allocation from full address space by specifying
hint address (with or without MAP_FIXED) above 47-bits. If the
application doesn't need a particular address, but wants to allocate
from whole address space it can specify -1 as a hint address.

Unfortunately, this trick breaks THP alignment in shmem/tmp:
shmem_get_unmapped_area() would not try to allocate PMD-aligned area if
*any* hint address specified.

This can be fixed by requesting the aligned area if the we failed to
allocated at user-specified hint address. The request with inflated
length will also take the user-specified hint address. This way we will
not lose an allocation request from the full address space.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace")
---
 mm/shmem.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 165fa6332993..dc539482ce67 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2107,9 +2107,10 @@ unsigned long shmem_get_unmapped_area(struct file *file,
 	/*
 	 * Our priority is to support MAP_SHARED mapped hugely;
 	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
-	 * But if caller specified an address hint, respect that as before.
+	 * But if caller specified an address hint and we allocated area there
+	 * successfully, respect that as before.
 	 */
-	if (uaddr)
+	if (uaddr == addr)
 		return addr;
 
 	if (shmem_huge != SHMEM_HUGE_FORCE) {
@@ -2143,7 +2144,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
 	if (inflated_len < len)
 		return addr;
 
-	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+	inflated_addr = get_area(uaddr, 0, inflated_len, 0, flags);
 	if (IS_ERR_VALUE(inflated_addr))
 		return addr;
 	if (inflated_addr & ~PAGE_MASK)
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs
  2019-12-20 14:25 [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Kirill A. Shutemov
  2019-12-20 14:25 ` [PATCH 1/2] thp: Fix conflict of above-47bit hint address and PMD alignment Kirill A. Shutemov
  2019-12-20 14:25 ` [PATCH 2/2] thp, shmem: " Kirill A. Shutemov
@ 2019-12-23 22:25 ` Andrew Morton
  2019-12-23 23:16   ` Kirill A. Shutemov
  2 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2019-12-23 22:25 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Willhalm, Thomas, Dan Williams, Bruggeman, Otto G,
	Aneesh Kumar K . V, linux-mm, x86, linux-kernel,
	Kirill A. Shutemov

On Fri, 20 Dec 2019 17:25:46 +0300 "Kirill A. Shutemov" <kirill@shutemov.name> wrote:

> The two get_unmapped_area() implementations have to be fixed to provide
> THP-friendly mappings if above-47bit hint address is specified.
> 

Do we need a cc:stable for these?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs
  2019-12-23 22:25 ` [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Andrew Morton
@ 2019-12-23 23:16   ` Kirill A. Shutemov
  0 siblings, 0 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2019-12-23 23:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Willhalm, Thomas, Dan Williams, Bruggeman, Otto G,
	Aneesh Kumar K . V, linux-mm, x86, linux-kernel,
	Kirill A. Shutemov

On Mon, Dec 23, 2019 at 02:25:52PM -0800, Andrew Morton wrote:
> On Fri, 20 Dec 2019 17:25:46 +0300 "Kirill A. Shutemov" <kirill@shutemov.name> wrote:
> 
> > The two get_unmapped_area() implementations have to be fixed to provide
> > THP-friendly mappings if above-47bit hint address is specified.
> > 
> 
> Do we need a cc:stable for these?

Yes, please. Otherwise users that use above-47bit hint would regress on
number of PMD-mapped THPs.

But it's not urgent. It's okay to cook it up in linux-next for a while.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] thp, shmem: Fix conflict of above-47bit hint address and PMD alignment
  2019-12-20 14:25 ` [PATCH 2/2] thp, shmem: " Kirill A. Shutemov
@ 2019-12-28  0:10   ` kbuild test robot
  0 siblings, 0 replies; 6+ messages in thread
From: kbuild test robot @ 2019-12-28  0:10 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: kbuild-all, Andrew Morton, Willhalm, Thomas, Dan Williams,
	Bruggeman, Otto G, Aneesh Kumar K . V, linux-mm, x86,
	linux-kernel, Kirill A. Shutemov

Hi "Kirill,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.5-rc3 next-20191220]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Kirill-A-Shutemov/Fix-two-above-47bit-hint-address-vs-THP-bugs/20191223-221713
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 46cf053efec6a3a5f343fead837777efe8252a46
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.1-129-g341daf20-dirty
        make ARCH=x86_64 allmodconfig
        make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> mm/shmem.c:2147:34: sparse: sparse: incorrect type in argument 1 (different base types)
>> mm/shmem.c:2147:34: sparse:    expected struct file *
>> mm/shmem.c:2147:34: sparse:    got unsigned long uaddr

vim +2147 mm/shmem.c

  2073	
  2074	unsigned long shmem_get_unmapped_area(struct file *file,
  2075					      unsigned long uaddr, unsigned long len,
  2076					      unsigned long pgoff, unsigned long flags)
  2077	{
  2078		unsigned long (*get_area)(struct file *,
  2079			unsigned long, unsigned long, unsigned long, unsigned long);
  2080		unsigned long addr;
  2081		unsigned long offset;
  2082		unsigned long inflated_len;
  2083		unsigned long inflated_addr;
  2084		unsigned long inflated_offset;
  2085	
  2086		if (len > TASK_SIZE)
  2087			return -ENOMEM;
  2088	
  2089		get_area = current->mm->get_unmapped_area;
  2090		addr = get_area(file, uaddr, len, pgoff, flags);
  2091	
  2092		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
  2093			return addr;
  2094		if (IS_ERR_VALUE(addr))
  2095			return addr;
  2096		if (addr & ~PAGE_MASK)
  2097			return addr;
  2098		if (addr > TASK_SIZE - len)
  2099			return addr;
  2100	
  2101		if (shmem_huge == SHMEM_HUGE_DENY)
  2102			return addr;
  2103		if (len < HPAGE_PMD_SIZE)
  2104			return addr;
  2105		if (flags & MAP_FIXED)
  2106			return addr;
  2107		/*
  2108		 * Our priority is to support MAP_SHARED mapped hugely;
  2109		 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
  2110		 * But if caller specified an address hint and we allocated area there
  2111		 * successfully, respect that as before.
  2112		 */
  2113		if (uaddr == addr)
  2114			return addr;
  2115	
  2116		if (shmem_huge != SHMEM_HUGE_FORCE) {
  2117			struct super_block *sb;
  2118	
  2119			if (file) {
  2120				VM_BUG_ON(file->f_op != &shmem_file_operations);
  2121				sb = file_inode(file)->i_sb;
  2122			} else {
  2123				/*
  2124				 * Called directly from mm/mmap.c, or drivers/char/mem.c
  2125				 * for "/dev/zero", to create a shared anonymous object.
  2126				 */
  2127				if (IS_ERR(shm_mnt))
  2128					return addr;
  2129				sb = shm_mnt->mnt_sb;
  2130			}
  2131			if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
  2132				return addr;
  2133		}
  2134	
  2135		offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
  2136		if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
  2137			return addr;
  2138		if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
  2139			return addr;
  2140	
  2141		inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
  2142		if (inflated_len > TASK_SIZE)
  2143			return addr;
  2144		if (inflated_len < len)
  2145			return addr;
  2146	
> 2147		inflated_addr = get_area(uaddr, 0, inflated_len, 0, flags);
  2148		if (IS_ERR_VALUE(inflated_addr))
  2149			return addr;
  2150		if (inflated_addr & ~PAGE_MASK)
  2151			return addr;
  2152	
  2153		inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
  2154		inflated_addr += offset - inflated_offset;
  2155		if (inflated_offset > offset)
  2156			inflated_addr += HPAGE_PMD_SIZE;
  2157	
  2158		if (inflated_addr > TASK_SIZE - len)
  2159			return addr;
  2160		return inflated_addr;
  2161	}
  2162	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-12-28  0:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-20 14:25 [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Kirill A. Shutemov
2019-12-20 14:25 ` [PATCH 1/2] thp: Fix conflict of above-47bit hint address and PMD alignment Kirill A. Shutemov
2019-12-20 14:25 ` [PATCH 2/2] thp, shmem: " Kirill A. Shutemov
2019-12-28  0:10   ` kbuild test robot
2019-12-23 22:25 ` [PATCH 0/2] Fix two above-47bit hint address vs. THP bugs Andrew Morton
2019-12-23 23:16   ` Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).