All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page
       [not found] <054e01d1c86d$c7261fd0$55725f70$@alibaba-inc.com>
@ 2016-06-17  8:06   ` Hillf Danton
  0 siblings, 0 replies; 6+ messages in thread
From: Hillf Danton @ 2016-06-17  8:06 UTC (permalink / raw)
  To: Hugh Dickins, Kirill A. Shutemov; +Cc: linux-kernel, linux-mm

> 
> +unsigned long shmem_get_unmapped_area(struct file *file,
> +				      unsigned long uaddr, unsigned long len,
> +				      unsigned long pgoff, unsigned long flags)
> +{
> +	unsigned long (*get_area)(struct file *,
> +		unsigned long, unsigned long, unsigned long, unsigned long);
> +	unsigned long addr;
> +	unsigned long offset;
> +	unsigned long inflated_len;
> +	unsigned long inflated_addr;
> +	unsigned long inflated_offset;
> +
> +	if (len > TASK_SIZE)
> +		return -ENOMEM;
> +
> +	get_area = current->mm->get_unmapped_area;
> +	addr = get_area(file, uaddr, len, pgoff, flags);
> +
> +	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> +		return addr;
> +	if (IS_ERR_VALUE(addr))
> +		return addr;
> +	if (addr & ~PAGE_MASK)
> +		return addr;
> +	if (addr > TASK_SIZE - len)
> +		return addr;
> +
> +	if (shmem_huge == SHMEM_HUGE_DENY)
> +		return addr;
> +	if (len < HPAGE_PMD_SIZE)
> +		return addr;
> +	if (flags & MAP_FIXED)
> +		return addr;
> +	/*
> +	 * Our priority is to support MAP_SHARED mapped hugely;
> +	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
> +	 * But if caller specified an address hint, respect that as before.
> +	 */
> +	if (uaddr)
> +		return addr;
> +
> +	if (shmem_huge != SHMEM_HUGE_FORCE) {
> +		struct super_block *sb;
> +
> +		if (file) {
> +			VM_BUG_ON(file->f_op != &shmem_file_operations);
> +			sb = file_inode(file)->i_sb;
> +		} else {
> +			/*
> +			 * Called directly from mm/mmap.c, or drivers/char/mem.c
> +			 * for "/dev/zero", to create a shared anonymous object.
> +			 */
> +			if (IS_ERR(shm_mnt))
> +				return addr;
> +			sb = shm_mnt->mnt_sb;
> +		}
> +		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
> +			return addr;

Try to ask for a larger arena if huge page is not disabled for 
the mount(s/!=/==/)?

> +	}
> +
> +	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
> +	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
> +		return addr;
> +	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
> +		return addr;
> +
> +	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
> +	if (inflated_len > TASK_SIZE)
> +		return addr;
> +	if (inflated_len < len)
> +		return addr;
> +
> +	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
> +	if (IS_ERR_VALUE(inflated_addr))
> +		return addr;
> +	if (inflated_addr & ~PAGE_MASK)
> +		return addr;
> +
> +	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
> +	inflated_addr += offset - inflated_offset;
> +	if (inflated_offset > offset)
> +		inflated_addr += HPAGE_PMD_SIZE;
> +
> +	if (inflated_addr > TASK_SIZE - len)
> +		return addr;
> +	return inflated_addr;
> +}
> +
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page
@ 2016-06-17  8:06   ` Hillf Danton
  0 siblings, 0 replies; 6+ messages in thread
From: Hillf Danton @ 2016-06-17  8:06 UTC (permalink / raw)
  To: Hugh Dickins, Kirill A. Shutemov; +Cc: linux-kernel, linux-mm

> 
> +unsigned long shmem_get_unmapped_area(struct file *file,
> +				      unsigned long uaddr, unsigned long len,
> +				      unsigned long pgoff, unsigned long flags)
> +{
> +	unsigned long (*get_area)(struct file *,
> +		unsigned long, unsigned long, unsigned long, unsigned long);
> +	unsigned long addr;
> +	unsigned long offset;
> +	unsigned long inflated_len;
> +	unsigned long inflated_addr;
> +	unsigned long inflated_offset;
> +
> +	if (len > TASK_SIZE)
> +		return -ENOMEM;
> +
> +	get_area = current->mm->get_unmapped_area;
> +	addr = get_area(file, uaddr, len, pgoff, flags);
> +
> +	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> +		return addr;
> +	if (IS_ERR_VALUE(addr))
> +		return addr;
> +	if (addr & ~PAGE_MASK)
> +		return addr;
> +	if (addr > TASK_SIZE - len)
> +		return addr;
> +
> +	if (shmem_huge == SHMEM_HUGE_DENY)
> +		return addr;
> +	if (len < HPAGE_PMD_SIZE)
> +		return addr;
> +	if (flags & MAP_FIXED)
> +		return addr;
> +	/*
> +	 * Our priority is to support MAP_SHARED mapped hugely;
> +	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
> +	 * But if caller specified an address hint, respect that as before.
> +	 */
> +	if (uaddr)
> +		return addr;
> +
> +	if (shmem_huge != SHMEM_HUGE_FORCE) {
> +		struct super_block *sb;
> +
> +		if (file) {
> +			VM_BUG_ON(file->f_op != &shmem_file_operations);
> +			sb = file_inode(file)->i_sb;
> +		} else {
> +			/*
> +			 * Called directly from mm/mmap.c, or drivers/char/mem.c
> +			 * for "/dev/zero", to create a shared anonymous object.
> +			 */
> +			if (IS_ERR(shm_mnt))
> +				return addr;
> +			sb = shm_mnt->mnt_sb;
> +		}
> +		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
> +			return addr;

Try to ask for a larger arena if huge page is not disabled for 
the mount(s/!=/==/)?

> +	}
> +
> +	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
> +	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
> +		return addr;
> +	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
> +		return addr;
> +
> +	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
> +	if (inflated_len > TASK_SIZE)
> +		return addr;
> +	if (inflated_len < len)
> +		return addr;
> +
> +	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
> +	if (IS_ERR_VALUE(inflated_addr))
> +		return addr;
> +	if (inflated_addr & ~PAGE_MASK)
> +		return addr;
> +
> +	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
> +	inflated_addr += offset - inflated_offset;
> +	if (inflated_offset > offset)
> +		inflated_addr += HPAGE_PMD_SIZE;
> +
> +	if (inflated_addr > TASK_SIZE - len)
> +		return addr;
> +	return inflated_addr;
> +}
> +
> 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page
  2016-06-17  8:06   ` Hillf Danton
@ 2016-06-17 11:29     ` Kirill A. Shutemov
  -1 siblings, 0 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2016-06-17 11:29 UTC (permalink / raw)
  To: Hillf Danton; +Cc: Hugh Dickins, Kirill A. Shutemov, linux-kernel, linux-mm

On Fri, Jun 17, 2016 at 04:06:33PM +0800, Hillf Danton wrote:
> > 
> > +unsigned long shmem_get_unmapped_area(struct file *file,
> > +				      unsigned long uaddr, unsigned long len,
> > +				      unsigned long pgoff, unsigned long flags)
> > +{
> > +	unsigned long (*get_area)(struct file *,
> > +		unsigned long, unsigned long, unsigned long, unsigned long);
> > +	unsigned long addr;
> > +	unsigned long offset;
> > +	unsigned long inflated_len;
> > +	unsigned long inflated_addr;
> > +	unsigned long inflated_offset;
> > +
> > +	if (len > TASK_SIZE)
> > +		return -ENOMEM;
> > +
> > +	get_area = current->mm->get_unmapped_area;
> > +	addr = get_area(file, uaddr, len, pgoff, flags);
> > +
> > +	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> > +		return addr;
> > +	if (IS_ERR_VALUE(addr))
> > +		return addr;
> > +	if (addr & ~PAGE_MASK)
> > +		return addr;
> > +	if (addr > TASK_SIZE - len)
> > +		return addr;
> > +
> > +	if (shmem_huge == SHMEM_HUGE_DENY)
> > +		return addr;
> > +	if (len < HPAGE_PMD_SIZE)
> > +		return addr;
> > +	if (flags & MAP_FIXED)
> > +		return addr;
> > +	/*
> > +	 * Our priority is to support MAP_SHARED mapped hugely;
> > +	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
> > +	 * But if caller specified an address hint, respect that as before.
> > +	 */
> > +	if (uaddr)
> > +		return addr;
> > +
> > +	if (shmem_huge != SHMEM_HUGE_FORCE) {
> > +		struct super_block *sb;
> > +
> > +		if (file) {
> > +			VM_BUG_ON(file->f_op != &shmem_file_operations);
> > +			sb = file_inode(file)->i_sb;
> > +		} else {
> > +			/*
> > +			 * Called directly from mm/mmap.c, or drivers/char/mem.c
> > +			 * for "/dev/zero", to create a shared anonymous object.
> > +			 */
> > +			if (IS_ERR(shm_mnt))
> > +				return addr;
> > +			sb = shm_mnt->mnt_sb;
> > +		}
> > +		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
> > +			return addr;
> 
> Try to ask for a larger arena if huge page is not disabled for 
> the mount(s/!=/==/)?

<facepalm>

I mostly test with SHMEM_HUGE_FORCE as it puts more stress on the system.

Fixup:

diff --git a/mm/shmem.c b/mm/shmem.c
index e2c6b6e8387a..3f4ebe84ef61 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1979,7 +1979,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
 				return addr;
 			sb = shm_mnt->mnt_sb;
 		}
-		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
 			return addr;
 	}
 
-- 
 Kirill A. Shutemov

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page
@ 2016-06-17 11:29     ` Kirill A. Shutemov
  0 siblings, 0 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2016-06-17 11:29 UTC (permalink / raw)
  To: Hillf Danton; +Cc: Hugh Dickins, Kirill A. Shutemov, linux-kernel, linux-mm

On Fri, Jun 17, 2016 at 04:06:33PM +0800, Hillf Danton wrote:
> > 
> > +unsigned long shmem_get_unmapped_area(struct file *file,
> > +				      unsigned long uaddr, unsigned long len,
> > +				      unsigned long pgoff, unsigned long flags)
> > +{
> > +	unsigned long (*get_area)(struct file *,
> > +		unsigned long, unsigned long, unsigned long, unsigned long);
> > +	unsigned long addr;
> > +	unsigned long offset;
> > +	unsigned long inflated_len;
> > +	unsigned long inflated_addr;
> > +	unsigned long inflated_offset;
> > +
> > +	if (len > TASK_SIZE)
> > +		return -ENOMEM;
> > +
> > +	get_area = current->mm->get_unmapped_area;
> > +	addr = get_area(file, uaddr, len, pgoff, flags);
> > +
> > +	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> > +		return addr;
> > +	if (IS_ERR_VALUE(addr))
> > +		return addr;
> > +	if (addr & ~PAGE_MASK)
> > +		return addr;
> > +	if (addr > TASK_SIZE - len)
> > +		return addr;
> > +
> > +	if (shmem_huge == SHMEM_HUGE_DENY)
> > +		return addr;
> > +	if (len < HPAGE_PMD_SIZE)
> > +		return addr;
> > +	if (flags & MAP_FIXED)
> > +		return addr;
> > +	/*
> > +	 * Our priority is to support MAP_SHARED mapped hugely;
> > +	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
> > +	 * But if caller specified an address hint, respect that as before.
> > +	 */
> > +	if (uaddr)
> > +		return addr;
> > +
> > +	if (shmem_huge != SHMEM_HUGE_FORCE) {
> > +		struct super_block *sb;
> > +
> > +		if (file) {
> > +			VM_BUG_ON(file->f_op != &shmem_file_operations);
> > +			sb = file_inode(file)->i_sb;
> > +		} else {
> > +			/*
> > +			 * Called directly from mm/mmap.c, or drivers/char/mem.c
> > +			 * for "/dev/zero", to create a shared anonymous object.
> > +			 */
> > +			if (IS_ERR(shm_mnt))
> > +				return addr;
> > +			sb = shm_mnt->mnt_sb;
> > +		}
> > +		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
> > +			return addr;
> 
> Try to ask for a larger arena if huge page is not disabled for 
> the mount(s/!=/==/)?

<facepalm>

I mostly test with SHMEM_HUGE_FORCE as it puts more stress on the system.

Fixup:

diff --git a/mm/shmem.c b/mm/shmem.c
index e2c6b6e8387a..3f4ebe84ef61 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1979,7 +1979,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
 				return addr;
 			sb = shm_mnt->mnt_sb;
 		}
-		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
 			return addr;
 	}
 
-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page
  2016-06-15 20:06 ` [PATCHv9-rebased2 00/37] " Kirill A. Shutemov
@ 2016-06-15 20:06     ` Kirill A. Shutemov
  0 siblings, 0 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2016-06-15 20:06 UTC (permalink / raw)
  To: Hugh Dickins, Andrea Arcangeli, Andrew Morton
  Cc: Dave Hansen, Vlastimil Babka, Christoph Lameter, Naoya Horiguchi,
	Jerome Marchand, Yang Shi, Sasha Levin, Andres Lagar-Cavilla,
	Ning Qu, linux-kernel, linux-mm, linux-fsdevel, Ebru Akagunduz,
	Kirill A . Shutemov

From: Hugh Dickins <hughd@google.com>

Provide a shmem_get_unmapped_area method in file_operations, called
at mmap time to decide the mapping address.  It could be conditional
on CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by
making it unconditional.

shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area
(which we treat as a black box, highly dependent on architecture and
config and executable layout).  Lots of conditions, and in most cases
it just goes with the address that chose; but when our huge stars are
rightly aligned, yet that did not provide a suitable address, go back
to ask for a larger arena, within which to align the mapping suitably.

There have to be some direct calls to shmem_get_unmapped_area(),
not via the file_operations: because of the way shmem_zero_setup()
is called to create a shmem object late in the mmap sequence, when
MAP_SHARED is requested with MAP_ANONYMOUS or /dev/zero.  Though
this only matters when /proc/sys/vm/shmem_huge has been set.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 drivers/char/mem.c       | 24 ++++++++++++
 include/linux/shmem_fs.h |  2 +
 ipc/shm.c                |  6 ++-
 mm/mmap.c                | 16 +++++++-
 mm/shmem.c               | 98 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 71025c2f6bbb..9656f1095c19 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/highmem.h>
 #include <linux/backing-dev.h>
+#include <linux/shmem_fs.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
 #include <linux/export.h>
@@ -661,6 +662,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
+static unsigned long get_unmapped_area_zero(struct file *file,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags)
+{
+#ifdef CONFIG_MMU
+	if (flags & MAP_SHARED) {
+		/*
+		 * mmap_zero() will call shmem_zero_setup() to create a file,
+		 * so use shmem's get_unmapped_area in case it can be huge;
+		 * and pass NULL for file as in mmap.c's get_unmapped_area(),
+		 * so as not to confuse shmem with our handle on "/dev/zero".
+		 */
+		return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
+	}
+
+	/* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+#else
+	return -ENOSYS;
+#endif
+}
+
 static ssize_t write_full(struct file *file, const char __user *buf,
 			  size_t count, loff_t *ppos)
 {
@@ -768,6 +791,7 @@ static const struct file_operations zero_fops = {
 	.read_iter	= read_iter_zero,
 	.write_iter	= write_iter_zero,
 	.mmap		= mmap_zero,
+	.get_unmapped_area = get_unmapped_area_zero,
 #ifndef CONFIG_MMU
 	.mmap_capabilities = zero_mmap_capabilities,
 #endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 466f18c73a49..ff2de4bab61f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -50,6 +50,8 @@ extern struct file *shmem_file_setup(const char *name,
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
 					    unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
+extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern bool shmem_mapping(struct address_space *mapping);
 extern void shmem_unlock_mapping(struct address_space *mapping);
diff --git a/ipc/shm.c b/ipc/shm.c
index 13282510bc0d..7fa5cbebbf19 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = {
 	.mmap		= shm_mmap,
 	.fsync		= shm_fsync,
 	.release	= shm_release,
-#ifndef CONFIG_MMU
 	.get_unmapped_area	= shm_get_unmapped_area,
-#endif
 	.llseek		= noop_llseek,
 	.fallocate	= shm_fallocate,
 };
 
+/*
+ * shm_file_operations_huge is now identical to shm_file_operations,
+ * but we keep it distinct for the sake of is_file_shm_hugepages().
+ */
 static const struct file_operations shm_file_operations_huge = {
 	.mmap		= shm_mmap,
 	.fsync		= shm_fsync,
diff --git a/mm/mmap.c b/mm/mmap.c
index daabef097c78..25c2b4e0fbdc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -25,6 +25,7 @@
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
 #include <linux/profile.h>
 #include <linux/export.h>
 #include <linux/mount.h>
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		return -ENOMEM;
 
 	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
+	if (file) {
+		if (file->f_op->get_unmapped_area)
+			get_area = file->f_op->get_unmapped_area;
+	} else if (flags & MAP_SHARED) {
+		/*
+		 * mmap_region() will call shmem_zero_setup() to create a file,
+		 * so use shmem's get_unmapped_area in case it can be huge.
+		 * do_mmap_pgoff() will clear pgoff, so match alignment.
+		 */
+		pgoff = 0;
+		get_area = shmem_get_unmapped_area;
+	}
+
 	addr = get_area(file, addr, len, pgoff, flags);
 	if (IS_ERR_VALUE(addr))
 		return addr;
diff --git a/mm/shmem.c b/mm/shmem.c
index 2051e0685a43..f092d9aa129d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1513,6 +1513,94 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return ret;
 }
 
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long uaddr, unsigned long len,
+				      unsigned long pgoff, unsigned long flags)
+{
+	unsigned long (*get_area)(struct file *,
+		unsigned long, unsigned long, unsigned long, unsigned long);
+	unsigned long addr;
+	unsigned long offset;
+	unsigned long inflated_len;
+	unsigned long inflated_addr;
+	unsigned long inflated_offset;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	get_area = current->mm->get_unmapped_area;
+	addr = get_area(file, uaddr, len, pgoff, flags);
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return addr;
+	if (IS_ERR_VALUE(addr))
+		return addr;
+	if (addr & ~PAGE_MASK)
+		return addr;
+	if (addr > TASK_SIZE - len)
+		return addr;
+
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return addr;
+	if (len < HPAGE_PMD_SIZE)
+		return addr;
+	if (flags & MAP_FIXED)
+		return addr;
+	/*
+	 * Our priority is to support MAP_SHARED mapped hugely;
+	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
+	 * But if caller specified an address hint, respect that as before.
+	 */
+	if (uaddr)
+		return addr;
+
+	if (shmem_huge != SHMEM_HUGE_FORCE) {
+		struct super_block *sb;
+
+		if (file) {
+			VM_BUG_ON(file->f_op != &shmem_file_operations);
+			sb = file_inode(file)->i_sb;
+		} else {
+			/*
+			 * Called directly from mm/mmap.c, or drivers/char/mem.c
+			 * for "/dev/zero", to create a shared anonymous object.
+			 */
+			if (IS_ERR(shm_mnt))
+				return addr;
+			sb = shm_mnt->mnt_sb;
+		}
+		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+			return addr;
+	}
+
+	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
+	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
+		return addr;
+	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
+		return addr;
+
+	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
+	if (inflated_len > TASK_SIZE)
+		return addr;
+	if (inflated_len < len)
+		return addr;
+
+	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+	if (IS_ERR_VALUE(inflated_addr))
+		return addr;
+	if (inflated_addr & ~PAGE_MASK)
+		return addr;
+
+	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
+	inflated_addr += offset - inflated_offset;
+	if (inflated_offset > offset)
+		inflated_addr += HPAGE_PMD_SIZE;
+
+	if (inflated_addr > TASK_SIZE - len)
+		return addr;
+	return inflated_addr;
+}
+
 #ifdef CONFIG_NUMA
 static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
 {
@@ -3259,6 +3347,7 @@ static const struct address_space_operations shmem_aops = {
 
 static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
+	.get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
 	.read_iter	= shmem_file_read_iter,
@@ -3494,6 +3583,15 @@ void shmem_unlock_mapping(struct address_space *mapping)
 {
 }
 
+#ifdef CONFIG_MMU
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long addr, unsigned long len,
+				      unsigned long pgoff, unsigned long flags)
+{
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+#endif
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page
@ 2016-06-15 20:06     ` Kirill A. Shutemov
  0 siblings, 0 replies; 6+ messages in thread
From: Kirill A. Shutemov @ 2016-06-15 20:06 UTC (permalink / raw)
  To: Hugh Dickins, Andrea Arcangeli, Andrew Morton
  Cc: Dave Hansen, Vlastimil Babka, Christoph Lameter, Naoya Horiguchi,
	Jerome Marchand, Yang Shi, Sasha Levin, Andres Lagar-Cavilla,
	Ning Qu, linux-kernel, linux-mm, linux-fsdevel, Ebru Akagunduz,
	Kirill A . Shutemov

From: Hugh Dickins <hughd@google.com>

Provide a shmem_get_unmapped_area method in file_operations, called
at mmap time to decide the mapping address.  It could be conditional
on CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by
making it unconditional.

shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area
(which we treat as a black box, highly dependent on architecture and
config and executable layout).  Lots of conditions, and in most cases
it just goes with the address that chose; but when our huge stars are
rightly aligned, yet that did not provide a suitable address, go back
to ask for a larger arena, within which to align the mapping suitably.

There have to be some direct calls to shmem_get_unmapped_area(),
not via the file_operations: because of the way shmem_zero_setup()
is called to create a shmem object late in the mmap sequence, when
MAP_SHARED is requested with MAP_ANONYMOUS or /dev/zero.  Though
this only matters when /proc/sys/vm/shmem_huge has been set.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 drivers/char/mem.c       | 24 ++++++++++++
 include/linux/shmem_fs.h |  2 +
 ipc/shm.c                |  6 ++-
 mm/mmap.c                | 16 +++++++-
 mm/shmem.c               | 98 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 71025c2f6bbb..9656f1095c19 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/highmem.h>
 #include <linux/backing-dev.h>
+#include <linux/shmem_fs.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
 #include <linux/export.h>
@@ -661,6 +662,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
+static unsigned long get_unmapped_area_zero(struct file *file,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags)
+{
+#ifdef CONFIG_MMU
+	if (flags & MAP_SHARED) {
+		/*
+		 * mmap_zero() will call shmem_zero_setup() to create a file,
+		 * so use shmem's get_unmapped_area in case it can be huge;
+		 * and pass NULL for file as in mmap.c's get_unmapped_area(),
+		 * so as not to confuse shmem with our handle on "/dev/zero".
+		 */
+		return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
+	}
+
+	/* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+#else
+	return -ENOSYS;
+#endif
+}
+
 static ssize_t write_full(struct file *file, const char __user *buf,
 			  size_t count, loff_t *ppos)
 {
@@ -768,6 +791,7 @@ static const struct file_operations zero_fops = {
 	.read_iter	= read_iter_zero,
 	.write_iter	= write_iter_zero,
 	.mmap		= mmap_zero,
+	.get_unmapped_area = get_unmapped_area_zero,
 #ifndef CONFIG_MMU
 	.mmap_capabilities = zero_mmap_capabilities,
 #endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 466f18c73a49..ff2de4bab61f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -50,6 +50,8 @@ extern struct file *shmem_file_setup(const char *name,
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
 					    unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
+extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern bool shmem_mapping(struct address_space *mapping);
 extern void shmem_unlock_mapping(struct address_space *mapping);
diff --git a/ipc/shm.c b/ipc/shm.c
index 13282510bc0d..7fa5cbebbf19 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = {
 	.mmap		= shm_mmap,
 	.fsync		= shm_fsync,
 	.release	= shm_release,
-#ifndef CONFIG_MMU
 	.get_unmapped_area	= shm_get_unmapped_area,
-#endif
 	.llseek		= noop_llseek,
 	.fallocate	= shm_fallocate,
 };
 
+/*
+ * shm_file_operations_huge is now identical to shm_file_operations,
+ * but we keep it distinct for the sake of is_file_shm_hugepages().
+ */
 static const struct file_operations shm_file_operations_huge = {
 	.mmap		= shm_mmap,
 	.fsync		= shm_fsync,
diff --git a/mm/mmap.c b/mm/mmap.c
index daabef097c78..25c2b4e0fbdc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -25,6 +25,7 @@
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
 #include <linux/profile.h>
 #include <linux/export.h>
 #include <linux/mount.h>
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		return -ENOMEM;
 
 	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
+	if (file) {
+		if (file->f_op->get_unmapped_area)
+			get_area = file->f_op->get_unmapped_area;
+	} else if (flags & MAP_SHARED) {
+		/*
+		 * mmap_region() will call shmem_zero_setup() to create a file,
+		 * so use shmem's get_unmapped_area in case it can be huge.
+		 * do_mmap_pgoff() will clear pgoff, so match alignment.
+		 */
+		pgoff = 0;
+		get_area = shmem_get_unmapped_area;
+	}
+
 	addr = get_area(file, addr, len, pgoff, flags);
 	if (IS_ERR_VALUE(addr))
 		return addr;
diff --git a/mm/shmem.c b/mm/shmem.c
index 2051e0685a43..f092d9aa129d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1513,6 +1513,94 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return ret;
 }
 
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long uaddr, unsigned long len,
+				      unsigned long pgoff, unsigned long flags)
+{
+	unsigned long (*get_area)(struct file *,
+		unsigned long, unsigned long, unsigned long, unsigned long);
+	unsigned long addr;
+	unsigned long offset;
+	unsigned long inflated_len;
+	unsigned long inflated_addr;
+	unsigned long inflated_offset;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	get_area = current->mm->get_unmapped_area;
+	addr = get_area(file, uaddr, len, pgoff, flags);
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return addr;
+	if (IS_ERR_VALUE(addr))
+		return addr;
+	if (addr & ~PAGE_MASK)
+		return addr;
+	if (addr > TASK_SIZE - len)
+		return addr;
+
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return addr;
+	if (len < HPAGE_PMD_SIZE)
+		return addr;
+	if (flags & MAP_FIXED)
+		return addr;
+	/*
+	 * Our priority is to support MAP_SHARED mapped hugely;
+	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
+	 * But if caller specified an address hint, respect that as before.
+	 */
+	if (uaddr)
+		return addr;
+
+	if (shmem_huge != SHMEM_HUGE_FORCE) {
+		struct super_block *sb;
+
+		if (file) {
+			VM_BUG_ON(file->f_op != &shmem_file_operations);
+			sb = file_inode(file)->i_sb;
+		} else {
+			/*
+			 * Called directly from mm/mmap.c, or drivers/char/mem.c
+			 * for "/dev/zero", to create a shared anonymous object.
+			 */
+			if (IS_ERR(shm_mnt))
+				return addr;
+			sb = shm_mnt->mnt_sb;
+		}
+		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+			return addr;
+	}
+
+	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
+	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
+		return addr;
+	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
+		return addr;
+
+	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
+	if (inflated_len > TASK_SIZE)
+		return addr;
+	if (inflated_len < len)
+		return addr;
+
+	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+	if (IS_ERR_VALUE(inflated_addr))
+		return addr;
+	if (inflated_addr & ~PAGE_MASK)
+		return addr;
+
+	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
+	inflated_addr += offset - inflated_offset;
+	if (inflated_offset > offset)
+		inflated_addr += HPAGE_PMD_SIZE;
+
+	if (inflated_addr > TASK_SIZE - len)
+		return addr;
+	return inflated_addr;
+}
+
 #ifdef CONFIG_NUMA
 static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
 {
@@ -3259,6 +3347,7 @@ static const struct address_space_operations shmem_aops = {
 
 static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
+	.get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
 	.read_iter	= shmem_file_read_iter,
@@ -3494,6 +3583,15 @@ void shmem_unlock_mapping(struct address_space *mapping)
 {
 }
 
+#ifdef CONFIG_MMU
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long addr, unsigned long len,
+				      unsigned long pgoff, unsigned long flags)
+{
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+#endif
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-06-17 11:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <054e01d1c86d$c7261fd0$55725f70$@alibaba-inc.com>
2016-06-17  8:06 ` [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page Hillf Danton
2016-06-17  8:06   ` Hillf Danton
2016-06-17 11:29   ` Kirill A. Shutemov
2016-06-17 11:29     ` Kirill A. Shutemov
2016-06-06 14:06 [PATCHv9 00/32] THP-enabled tmpfs/shmem using compound pages Kirill A. Shutemov
2016-06-15 20:06 ` [PATCHv9-rebased2 00/37] " Kirill A. Shutemov
2016-06-15 20:06   ` [PATCHv9-rebased2 28/37] shmem: get_unmapped_area align huge page Kirill A. Shutemov
2016-06-15 20:06     ` Kirill A. Shutemov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.