linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: Add MREMAP_DONTUNMAP to mremap().
@ 2020-01-23  1:46 Brian Geffon
  2020-01-23  3:02 ` Andy Lutomirski
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Brian Geffon @ 2020-01-23  1:46 UTC (permalink / raw)
  To: linux-mm
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, Brian Geffon,
	Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra,
	linux-kernel, linux-api, Yu Zhao, Jesse Barnes

MREMAP_DONTUNMAP is an additional flag that can be used with
MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2)
would then tear down the old vma so subsequent accesses to the vma
cause a segfault. However, with this new flag it will keep the old
vma with zapping PTEs so any access to the old VMA after that point
will result in a pagefault.

This feature will find a use in ChromeOS along with userfaultfd.
Specifically we will want to register a VMA with userfaultfd and then
pull it out from under a running process. By using MREMAP_DONTUNMAP we
don't have to worry about mprotecting and then potentially racing with
VMA permission changes from a running process.

This feature also has a use case in Android, Lokesh Gidra has said
that "As part of using userfaultfd for GC, We'll have to move the physical
pages of the java heap to a separate location. For this purpose mremap
will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
heap, its virtual mapping will be removed as well. Therefore, we'll
require performing mmap immediately after. This is not only time consuming
but also opens a time window where a native thread may call mmap and
reserve the java heap's address range for its own usage. This flag
solves the problem."

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 include/uapi/linux/mman.h |  5 +++--
 mm/mremap.c               | 37 ++++++++++++++++++++++++++++++-------
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index fc1a64c3447b..923cc162609c 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
 
-#define MREMAP_MAYMOVE	1
-#define MREMAP_FIXED	2
+#define MREMAP_MAYMOVE		1
+#define MREMAP_FIXED		2
+#define MREMAP_DONTUNMAP	4
 
 #define OVERCOMMIT_GUESS		0
 #define OVERCOMMIT_ALWAYS		1
diff --git a/mm/mremap.c b/mm/mremap.c
index 122938dcec15..bf97c3eb538b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long old_addr, unsigned long old_len,
 		unsigned long new_len, unsigned long new_addr,
-		bool *locked, struct vm_userfaultfd_ctx *uf,
-		struct list_head *uf_unmap)
+		bool *locked, unsigned long flags,
+		struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma;
@@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (unlikely(vma->vm_flags & VM_PFNMAP))
 		untrack_pfn_moved(vma);
 
+	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
+		if (vm_flags & VM_ACCOUNT)
+			vma->vm_flags |= VM_ACCOUNT;
+
+		goto out;
+	}
+
 	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		vm_unacct_memory(excess >> PAGE_SHIFT);
@@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 			vma->vm_next->vm_flags |= VM_ACCOUNT;
 	}
 
+out:
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		*locked = true;
@@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 
 static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		unsigned long new_addr, unsigned long new_len, bool *locked,
-		struct vm_userfaultfd_ctx *uf,
+		unsigned long flags, struct vm_userfaultfd_ctx *uf,
 		struct list_head *uf_unmap_early,
 		struct list_head *uf_unmap)
 {
@@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		old_len = new_len;
 	}
 
+	/*
+	 * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
+	 * check that we can expand by old_len and vma_to_resize will handle
+	 * the vma growing.
+	 */
+	if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
+				vma->vm_flags, old_len >> PAGE_SHIFT))) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	vma = vma_to_resize(addr, old_len, new_len, &charged);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
@@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 	if (IS_ERR_VALUE(ret))
 		goto out1;
 
-	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
 		       uf_unmap);
 	if (!(offset_in_page(ret)))
 		goto out;
@@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	addr = untagged_addr(addr);
 	new_addr = untagged_addr(new_addr);
 
-	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
+	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
 		return ret;
 
 	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
 		return ret;
 
+	if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED))
+		return ret;
+
 	if (offset_in_page(addr))
 		return ret;
 
@@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 
 	if (flags & MREMAP_FIXED) {
 		ret = mremap_to(addr, old_len, new_addr, new_len,
-				&locked, &uf, &uf_unmap_early, &uf_unmap);
+				&locked, flags, &uf, &uf_unmap_early,
+				&uf_unmap);
 		goto out;
 	}
 
@@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 		}
 
 		ret = move_vma(vma, addr, old_len, new_len, new_addr,
-			       &locked, &uf, &uf_unmap);
+			       &locked, flags, &uf, &uf_unmap);
 	}
 out:
 	if (offset_in_page(ret)) {
-- 
2.25.0.341.g760bfbb309-goog


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-23  1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon
@ 2020-01-23  3:02 ` Andy Lutomirski
  2020-01-23 19:03   ` Brian Geffon
  2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: Andy Lutomirski @ 2020-01-23  3:02 UTC (permalink / raw)
  To: Brian Geffon
  Cc: linux-mm, Andrew Morton, Michael S . Tsirkin, Arnd Bergmann,
	Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra,
	linux-kernel, linux-api, Yu Zhao, Jesse Barnes



> On Jan 22, 2020, at 5:46 PM, Brian Geffon <bgeffon@google.com> wrote:
> 
> MREMAP_DONTUNMAP is an additional flag that can be used with
> MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2)
> would then tear down the old vma so subsequent accesses to the vma
> cause a segfault. However, with this new flag it will keep the old
> vma with zapping PTEs so any access to the old VMA after that point
> will result in a pagefault.

This needs a vastly better description. Perhaps:

When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. Instead it will be cleared as if a brand new anonymous, private mapping had been created atomically as part of the mremap() call.  If a userfaultfd was watching the source, it will continue to watch the new mapping.  For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail.

Or is it something else?

> 
> This feature will find a use in ChromeOS along with userfaultfd.
> Specifically we will want to register a VMA with userfaultfd and then
> pull it out from under a running process. By using MREMAP_DONTUNMAP we
> don't have to worry about mprotecting and then potentially racing with
> VMA permission changes from a running process.

Does this mean you yank it out but you want to replace it simultaneously?

> 
> This feature also has a use case in Android, Lokesh Gidra has said
> that "As part of using userfaultfd for GC, We'll have to move the physical
> pages of the java heap to a separate location. For this purpose mremap
> will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> heap, its virtual mapping will be removed as well. Therefore, we'll
> require performing mmap immediately after. This is not only time consuming
> but also opens a time window where a native thread may call mmap and
> reserve the java heap's address range for its own usage. This flag
> solves the problem."

Cute.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-23  3:02 ` Andy Lutomirski
@ 2020-01-23 19:03   ` Brian Geffon
  0 siblings, 0 replies; 21+ messages in thread
From: Brian Geffon @ 2020-01-23 19:03 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-mm, Andrew Morton, Michael S . Tsirkin, Arnd Bergmann,
	Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra, LKML,
	linux-api, Yu Zhao, Jesse Barnes

Andy,

Thanks, yes, that's a much clearer description of the feature. I'll
make sure to update the description with subsequent patches and with
later man page updates.

Brian

On Wed, Jan 22, 2020 at 7:02 PM Andy Lutomirski <luto@amacapital.net> wrote:
>
>
>
> > On Jan 22, 2020, at 5:46 PM, Brian Geffon <bgeffon@google.com> wrote:
> >
> > MREMAP_DONTUNMAP is an additional flag that can be used with
> > MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2)
> > would then tear down the old vma so subsequent accesses to the vma
> > cause a segfault. However, with this new flag it will keep the old
> > vma with zapping PTEs so any access to the old VMA after that point
> > will result in a pagefault.
>
> This needs a vastly better description. Perhaps:
>
> When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. Instead it will be cleared as if a brand new anonymous, private mapping had been created atomically as part of the mremap() call.  If a userfaultfd was watching the source, it will continue to watch the new mapping.  For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail.
>
> Or is it something else?
>
> >
> > This feature will find a use in ChromeOS along with userfaultfd.
> > Specifically we will want to register a VMA with userfaultfd and then
> > pull it out from under a running process. By using MREMAP_DONTUNMAP we
> > don't have to worry about mprotecting and then potentially racing with
> > VMA permission changes from a running process.
>
> Does this mean you yank it out but you want to replace it simultaneously?
>
> >
> > This feature also has a use case in Android, Lokesh Gidra has said
> > that "As part of using userfaultfd for GC, We'll have to move the physical
> > pages of the java heap to a separate location. For this purpose mremap
> > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> > heap, its virtual mapping will be removed as well. Therefore, we'll
> > require performing mmap immediately after. This is not only time consuming
> > but also opens a time window where a native thread may call mmap and
> > reserve the java heap's address range for its own usage. This flag
> > solves the problem."
>
> Cute.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-23  1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon
  2020-01-23  3:02 ` Andy Lutomirski
@ 2020-01-24 19:06 ` Brian Geffon
  2020-01-26  5:16   ` Nathan Chancellor
                     ` (2 more replies)
  2020-01-27  4:46 ` [PATCH] " Dan Carpenter
  2020-01-27  5:30 ` [PATCH v3] " Brian Geffon
  3 siblings, 3 replies; 21+ messages in thread
From: Brian Geffon @ 2020-01-24 19:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Michael S . Tsirkin, Brian Geffon, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
set, the source mapping will not be removed. Instead it will be
cleared as if a brand new anonymous, private mapping had been created
atomically as part of the mremap() call.  If a userfaultfd was watching
the source, it will continue to watch the new mapping.  For a mapping
that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
also used. The final result is two equally sized VMAs where the
destination contains the PTEs of the source.

We hope to use this in Chrome OS where with userfaultfd we could write
an anonymous mapping to disk without having to STOP the process or worry
about VMA permission changes.

This feature also has a use case in Android, Lokesh Gidra has said
that "As part of using userfaultfd for GC, We'll have to move the physical
pages of the java heap to a separate location. For this purpose mremap
will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
heap, its virtual mapping will be removed as well. Therefore, we'll
require performing mmap immediately after. This is not only time consuming
but also opens a time window where a native thread may call mmap and
reserve the java heap's address range for its own usage. This flag
solves the problem."

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 include/uapi/linux/mman.h |  5 +++--
 mm/mremap.c               | 37 ++++++++++++++++++++++++++++++-------
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index fc1a64c3447b..923cc162609c 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
 
-#define MREMAP_MAYMOVE	1
-#define MREMAP_FIXED	2
+#define MREMAP_MAYMOVE		1
+#define MREMAP_FIXED		2
+#define MREMAP_DONTUNMAP	4
 
 #define OVERCOMMIT_GUESS		0
 #define OVERCOMMIT_ALWAYS		1
diff --git a/mm/mremap.c b/mm/mremap.c
index 122938dcec15..bf97c3eb538b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long old_addr, unsigned long old_len,
 		unsigned long new_len, unsigned long new_addr,
-		bool *locked, struct vm_userfaultfd_ctx *uf,
-		struct list_head *uf_unmap)
+		bool *locked, unsigned long flags,
+		struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma;
@@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (unlikely(vma->vm_flags & VM_PFNMAP))
 		untrack_pfn_moved(vma);
 
+	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
+		if (vm_flags & VM_ACCOUNT)
+			vma->vm_flags |= VM_ACCOUNT;
+
+		goto out;
+	}
+
 	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		vm_unacct_memory(excess >> PAGE_SHIFT);
@@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 			vma->vm_next->vm_flags |= VM_ACCOUNT;
 	}
 
+out:
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		*locked = true;
@@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 
 static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		unsigned long new_addr, unsigned long new_len, bool *locked,
-		struct vm_userfaultfd_ctx *uf,
+		unsigned long flags, struct vm_userfaultfd_ctx *uf,
 		struct list_head *uf_unmap_early,
 		struct list_head *uf_unmap)
 {
@@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		old_len = new_len;
 	}
 
+	/*
+	 * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
+	 * check that we can expand by old_len and vma_to_resize will handle
+	 * the vma growing.
+	 */
+	if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
+				vma->vm_flags, old_len >> PAGE_SHIFT))) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	vma = vma_to_resize(addr, old_len, new_len, &charged);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
@@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 	if (IS_ERR_VALUE(ret))
 		goto out1;
 
-	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
 		       uf_unmap);
 	if (!(offset_in_page(ret)))
 		goto out;
@@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	addr = untagged_addr(addr);
 	new_addr = untagged_addr(new_addr);
 
-	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
+	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
 		return ret;
 
 	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
 		return ret;
 
+	if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED))
+		return ret;
+
 	if (offset_in_page(addr))
 		return ret;
 
@@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 
 	if (flags & MREMAP_FIXED) {
 		ret = mremap_to(addr, old_len, new_addr, new_len,
-				&locked, &uf, &uf_unmap_early, &uf_unmap);
+				&locked, flags, &uf, &uf_unmap_early,
+				&uf_unmap);
 		goto out;
 	}
 
@@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 		}
 
 		ret = move_vma(vma, addr, old_len, new_len, new_addr,
-			       &locked, &uf, &uf_unmap);
+			       &locked, flags, &uf, &uf_unmap);
 	}
 out:
 	if (offset_in_page(ret)) {
-- 
2.25.0.341.g760bfbb309-goog


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
@ 2020-01-26  5:16   ` Nathan Chancellor
  2020-01-27  2:21     ` Brian Geffon
  2020-01-26 22:06   ` Kirill A. Shutemov
  2020-01-27 10:13   ` Florian Weimer
  2 siblings, 1 reply; 21+ messages in thread
From: Nathan Chancellor @ 2020-01-26  5:16 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes,
	clang-built-linux

Hi Brian,

On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote:
> When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> set, the source mapping will not be removed. Instead it will be
> cleared as if a brand new anonymous, private mapping had been created
> atomically as part of the mremap() call.  If a userfaultfd was watching
> the source, it will continue to watch the new mapping.  For a mapping
> that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
> also used. The final result is two equally sized VMAs where the
> destination contains the PTEs of the source.
> 
> We hope to use this in Chrome OS where with userfaultfd we could write
> an anonymous mapping to disk without having to STOP the process or worry
> about VMA permission changes.
> 
> This feature also has a use case in Android, Lokesh Gidra has said
> that "As part of using userfaultfd for GC, We'll have to move the physical
> pages of the java heap to a separate location. For this purpose mremap
> will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> heap, its virtual mapping will be removed as well. Therefore, we'll
> require performing mmap immediately after. This is not only time consuming
> but also opens a time window where a native thread may call mmap and
> reserve the java heap's address range for its own usage. This flag
> solves the problem."
> 
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
>  include/uapi/linux/mman.h |  5 +++--
>  mm/mremap.c               | 37 ++++++++++++++++++++++++++++++-------
>  2 files changed, 33 insertions(+), 9 deletions(-)
> 
> diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
> index fc1a64c3447b..923cc162609c 100644
> --- a/include/uapi/linux/mman.h
> +++ b/include/uapi/linux/mman.h
> @@ -5,8 +5,9 @@
>  #include <asm/mman.h>
>  #include <asm-generic/hugetlb_encode.h>
>  
> -#define MREMAP_MAYMOVE	1
> -#define MREMAP_FIXED	2
> +#define MREMAP_MAYMOVE		1
> +#define MREMAP_FIXED		2
> +#define MREMAP_DONTUNMAP	4
>  
>  #define OVERCOMMIT_GUESS		0
>  #define OVERCOMMIT_ALWAYS		1
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 122938dcec15..bf97c3eb538b 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  static unsigned long move_vma(struct vm_area_struct *vma,
>  		unsigned long old_addr, unsigned long old_len,
>  		unsigned long new_len, unsigned long new_addr,
> -		bool *locked, struct vm_userfaultfd_ctx *uf,
> -		struct list_head *uf_unmap)
> +		bool *locked, unsigned long flags,
> +		struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
>  	struct vm_area_struct *new_vma;
> @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma,
>  	if (unlikely(vma->vm_flags & VM_PFNMAP))
>  		untrack_pfn_moved(vma);
>  
> +	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
> +		if (vm_flags & VM_ACCOUNT)
> +			vma->vm_flags |= VM_ACCOUNT;
> +
> +		goto out;
> +	}
> +
>  	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
>  		/* OOM: unable to split vma, just get accounts right */
>  		vm_unacct_memory(excess >> PAGE_SHIFT);
> @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
>  			vma->vm_next->vm_flags |= VM_ACCOUNT;
>  	}
>  
> +out:
>  	if (vm_flags & VM_LOCKED) {
>  		mm->locked_vm += new_len >> PAGE_SHIFT;
>  		*locked = true;
> @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
>  
>  static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
>  		unsigned long new_addr, unsigned long new_len, bool *locked,
> -		struct vm_userfaultfd_ctx *uf,
> +		unsigned long flags, struct vm_userfaultfd_ctx *uf,
>  		struct list_head *uf_unmap_early,
>  		struct list_head *uf_unmap)
>  {
> @@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
>  		old_len = new_len;
>  	}
>  
> +	/*
> +	 * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
> +	 * check that we can expand by old_len and vma_to_resize will handle
> +	 * the vma growing.
> +	 */
> +	if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
> +				vma->vm_flags, old_len >> PAGE_SHIFT))) {

We received a Clang build report that vma is used uninitialized here
(they aren't being publicly sent to LKML due to GCC vs Clang
warning/error overlap):

https://groups.google.com/d/msg/clang-built-linux/gE5wRaeHdSI/xVA0MBQVEgAJ

Sure enough, vma is initialized first in the next block. Not sure if
this section should be moved below that initialization or if something
else should be done to resolve it but that dereference will obviously be
fatal.

Cheers,
Nathan

> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
>  	vma = vma_to_resize(addr, old_len, new_len, &charged);
>  	if (IS_ERR(vma)) {
>  		ret = PTR_ERR(vma);
> @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
>  	if (IS_ERR_VALUE(ret))
>  		goto out1;
>  
> -	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
> +	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
>  		       uf_unmap);
>  	if (!(offset_in_page(ret)))
>  		goto out;
> @@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
>  	addr = untagged_addr(addr);
>  	new_addr = untagged_addr(new_addr);
>  
> -	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
> +	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
>  		return ret;
>  
>  	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
>  		return ret;
>  
> +	if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED))
> +		return ret;
> +
>  	if (offset_in_page(addr))
>  		return ret;
>  
> @@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
>  
>  	if (flags & MREMAP_FIXED) {
>  		ret = mremap_to(addr, old_len, new_addr, new_len,
> -				&locked, &uf, &uf_unmap_early, &uf_unmap);
> +				&locked, flags, &uf, &uf_unmap_early,
> +				&uf_unmap);
>  		goto out;
>  	}
>  
> @@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
>  		}
>  
>  		ret = move_vma(vma, addr, old_len, new_len, new_addr,
> -			       &locked, &uf, &uf_unmap);
> +			       &locked, flags, &uf, &uf_unmap);
>  	}
>  out:
>  	if (offset_in_page(ret)) {
> -- 
> 2.25.0.341.g760bfbb309-goog
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
  2020-01-26  5:16   ` Nathan Chancellor
@ 2020-01-26 22:06   ` Kirill A. Shutemov
  2020-01-28  1:35     ` Brian Geffon
  2020-01-27 10:13   ` Florian Weimer
  2 siblings, 1 reply; 21+ messages in thread
From: Kirill A. Shutemov @ 2020-01-26 22:06 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote:
> When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> set, the source mapping will not be removed. Instead it will be
> cleared as if a brand new anonymous, private mapping had been created
> atomically as part of the mremap() call.  If a userfaultfd was watching
> the source, it will continue to watch the new mapping.  For a mapping
> that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
> also used.

Implies? From code it looks like it requires MREMAP_FIXED. And
MREMAP_FIXED requires MREMAP_MAYMOVE. That's strange flag chaining.
I don't really see need in such dependencies. It maybe indeed implied, not
requied.

> The final result is two equally sized VMAs where the
> destination contains the PTEs of the source.

Could you clarify rmap situation here? How the rmap hierarchy will look
like before and after the operation. Can the new VMA merge with the old
one? Sounds fishy to me.

> We hope to use this in Chrome OS where with userfaultfd we could write
> an anonymous mapping to disk without having to STOP the process or worry
> about VMA permission changes.
> 
> This feature also has a use case in Android, Lokesh Gidra has said
> that "As part of using userfaultfd for GC, We'll have to move the physical
> pages of the java heap to a separate location. For this purpose mremap
> will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> heap, its virtual mapping will be removed as well. Therefore, we'll
> require performing mmap immediately after. This is not only time consuming
> but also opens a time window where a native thread may call mmap and
> reserve the java heap's address range for its own usage. This flag
> solves the problem."

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-26  5:16   ` Nathan Chancellor
@ 2020-01-27  2:21     ` Brian Geffon
  0 siblings, 0 replies; 21+ messages in thread
From: Brian Geffon @ 2020-01-27  2:21 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes,
	clang-built-linux

Hi Nathan,
Thank you! That was an oversight on my part. I'll address it in the next patch.

Brian


On Sat, Jan 25, 2020 at 9:16 PM Nathan Chancellor
<natechancellor@gmail.com> wrote:
>
> Hi Brian,
>
> On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote:
> > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> > set, the source mapping will not be removed. Instead it will be
> > cleared as if a brand new anonymous, private mapping had been created
> > atomically as part of the mremap() call.  If a userfaultfd was watching
> > the source, it will continue to watch the new mapping.  For a mapping
> > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
> > also used. The final result is two equally sized VMAs where the
> > destination contains the PTEs of the source.
> >
> > We hope to use this in Chrome OS where with userfaultfd we could write
> > an anonymous mapping to disk without having to STOP the process or worry
> > about VMA permission changes.
> >
> > This feature also has a use case in Android, Lokesh Gidra has said
> > that "As part of using userfaultfd for GC, We'll have to move the physical
> > pages of the java heap to a separate location. For this purpose mremap
> > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> > heap, its virtual mapping will be removed as well. Therefore, we'll
> > require performing mmap immediately after. This is not only time consuming
> > but also opens a time window where a native thread may call mmap and
> > reserve the java heap's address range for its own usage. This flag
> > solves the problem."
> >
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > ---
> >  include/uapi/linux/mman.h |  5 +++--
> >  mm/mremap.c               | 37 ++++++++++++++++++++++++++++++-------
> >  2 files changed, 33 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
> > index fc1a64c3447b..923cc162609c 100644
> > --- a/include/uapi/linux/mman.h
> > +++ b/include/uapi/linux/mman.h
> > @@ -5,8 +5,9 @@
> >  #include <asm/mman.h>
> >  #include <asm-generic/hugetlb_encode.h>
> >
> > -#define MREMAP_MAYMOVE       1
> > -#define MREMAP_FIXED 2
> > +#define MREMAP_MAYMOVE               1
> > +#define MREMAP_FIXED         2
> > +#define MREMAP_DONTUNMAP     4
> >
> >  #define OVERCOMMIT_GUESS             0
> >  #define OVERCOMMIT_ALWAYS            1
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index 122938dcec15..bf97c3eb538b 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
> >  static unsigned long move_vma(struct vm_area_struct *vma,
> >               unsigned long old_addr, unsigned long old_len,
> >               unsigned long new_len, unsigned long new_addr,
> > -             bool *locked, struct vm_userfaultfd_ctx *uf,
> > -             struct list_head *uf_unmap)
> > +             bool *locked, unsigned long flags,
> > +             struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
> >  {
> >       struct mm_struct *mm = vma->vm_mm;
> >       struct vm_area_struct *new_vma;
> > @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma,
> >       if (unlikely(vma->vm_flags & VM_PFNMAP))
> >               untrack_pfn_moved(vma);
> >
> > +     if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
> > +             if (vm_flags & VM_ACCOUNT)
> > +                     vma->vm_flags |= VM_ACCOUNT;
> > +
> > +             goto out;
> > +     }
> > +
> >       if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
> >               /* OOM: unable to split vma, just get accounts right */
> >               vm_unacct_memory(excess >> PAGE_SHIFT);
> > @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
> >                       vma->vm_next->vm_flags |= VM_ACCOUNT;
> >       }
> >
> > +out:
> >       if (vm_flags & VM_LOCKED) {
> >               mm->locked_vm += new_len >> PAGE_SHIFT;
> >               *locked = true;
> > @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
> >
> >  static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
> >               unsigned long new_addr, unsigned long new_len, bool *locked,
> > -             struct vm_userfaultfd_ctx *uf,
> > +             unsigned long flags, struct vm_userfaultfd_ctx *uf,
> >               struct list_head *uf_unmap_early,
> >               struct list_head *uf_unmap)
> >  {
> > @@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
> >               old_len = new_len;
> >       }
> >
> > +     /*
> > +      * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
> > +      * check that we can expand by old_len and vma_to_resize will handle
> > +      * the vma growing.
> > +      */
> > +     if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
> > +                             vma->vm_flags, old_len >> PAGE_SHIFT))) {
>
> We received a Clang build report that vma is used uninitialized here
> (they aren't being publicly sent to LKML due to GCC vs Clang
> warning/error overlap):
>
> https://groups.google.com/d/msg/clang-built-linux/gE5wRaeHdSI/xVA0MBQVEgAJ
>
> Sure enough, vma is initialized first in the next block. Not sure if
> this section should be moved below that initialization or if something
> else should be done to resolve it but that dereference will obviously be
> fatal.
>
> Cheers,
> Nathan
>
> > +             ret = -ENOMEM;
> > +             goto out;
> > +     }
> > +
> >       vma = vma_to_resize(addr, old_len, new_len, &charged);
> >       if (IS_ERR(vma)) {
> >               ret = PTR_ERR(vma);
> > @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
> >       if (IS_ERR_VALUE(ret))
> >               goto out1;
> >
> > -     ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
> > +     ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
> >                      uf_unmap);
> >       if (!(offset_in_page(ret)))
> >               goto out;
> > @@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> >       addr = untagged_addr(addr);
> >       new_addr = untagged_addr(new_addr);
> >
> > -     if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
> > +     if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> >               return ret;
> >
> >       if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
> >               return ret;
> >
> > +     if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED))
> > +             return ret;
> > +
> >       if (offset_in_page(addr))
> >               return ret;
> >
> > @@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> >
> >       if (flags & MREMAP_FIXED) {
> >               ret = mremap_to(addr, old_len, new_addr, new_len,
> > -                             &locked, &uf, &uf_unmap_early, &uf_unmap);
> > +                             &locked, flags, &uf, &uf_unmap_early,
> > +                             &uf_unmap);
> >               goto out;
> >       }
> >
> > @@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> >               }
> >
> >               ret = move_vma(vma, addr, old_len, new_len, new_addr,
> > -                            &locked, &uf, &uf_unmap);
> > +                            &locked, flags, &uf, &uf_unmap);
> >       }
> >  out:
> >       if (offset_in_page(ret)) {
> > --
> > 2.25.0.341.g760bfbb309-goog
> >

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-23  1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon
  2020-01-23  3:02 ` Andy Lutomirski
  2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
@ 2020-01-27  4:46 ` Dan Carpenter
  2020-01-27  5:30 ` [PATCH v3] " Brian Geffon
  3 siblings, 0 replies; 21+ messages in thread
From: Dan Carpenter @ 2020-01-27  4:46 UTC (permalink / raw)
  To: kbuild, Brian Geffon
  Cc: kbuild-all, linux-mm, Andrew Morton, Michael S . Tsirkin,
	Arnd Bergmann, Brian Geffon, Sonny Rao, Minchan Kim,
	Joel Fernandes, Lokesh Gidra, linux-kernel, linux-api, Yu Zhao,
	Jesse Barnes

Hi Brian,

url:    https://github.com/0day-ci/linux/commits/Brian-Geffon/mm-Add-MREMAP_DONTUNMAP-to-mremap/20200125-013342
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 4703d9119972bf586d2cca76ec6438f819ffa30e

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
mm/mremap.c:561 mremap_to() error: potentially dereferencing uninitialized 'vma'.

# https://github.com/0day-ci/linux/commit/98663ca05501623c3da7f0f30be8ba7d632cf010
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 98663ca05501623c3da7f0f30be8ba7d632cf010
vim +/vma +561 mm/mremap.c

81909b842107ef Michel Lespinasse  2013-02-22  506  static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
72f87654c69690 Pavel Emelyanov    2017-02-22  507  		unsigned long new_addr, unsigned long new_len, bool *locked,
98663ca0550162 Brian Geffon       2020-01-22  508  		unsigned long flags, struct vm_userfaultfd_ctx *uf,
b22823719302e8 Mike Rapoport      2017-08-02  509  		struct list_head *uf_unmap_early,
897ab3e0c49e24 Mike Rapoport      2017-02-24  510  		struct list_head *uf_unmap)
ecc1a8993751de Al Viro            2009-11-24  511  {
ecc1a8993751de Al Viro            2009-11-24  512  	struct mm_struct *mm = current->mm;
ecc1a8993751de Al Viro            2009-11-24  513  	struct vm_area_struct *vma;
ecc1a8993751de Al Viro            2009-11-24  514  	unsigned long ret = -EINVAL;
ecc1a8993751de Al Viro            2009-11-24  515  	unsigned long charged = 0;
097eed103862f9 Al Viro            2009-11-24  516  	unsigned long map_flags;
ecc1a8993751de Al Viro            2009-11-24  517  
f19cb115a25f3f Alexander Kuleshov 2015-11-05  518  	if (offset_in_page(new_addr))
ecc1a8993751de Al Viro            2009-11-24  519  		goto out;
ecc1a8993751de Al Viro            2009-11-24  520  
ecc1a8993751de Al Viro            2009-11-24  521  	if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
ecc1a8993751de Al Viro            2009-11-24  522  		goto out;
ecc1a8993751de Al Viro            2009-11-24  523  
9943242ca46814 Oleg Nesterov      2015-09-04  524  	/* Ensure the old/new locations do not overlap */
9943242ca46814 Oleg Nesterov      2015-09-04  525  	if (addr + old_len > new_addr && new_addr + new_len > addr)
ecc1a8993751de Al Viro            2009-11-24  526  		goto out;
ecc1a8993751de Al Viro            2009-11-24  527  
ea2c3f6f554561 Oscar Salvador     2019-03-05  528  	/*
ea2c3f6f554561 Oscar Salvador     2019-03-05  529  	 * move_vma() need us to stay 4 maps below the threshold, otherwise
ea2c3f6f554561 Oscar Salvador     2019-03-05  530  	 * it will bail out at the very beginning.
ea2c3f6f554561 Oscar Salvador     2019-03-05  531  	 * That is a problem if we have already unmaped the regions here
ea2c3f6f554561 Oscar Salvador     2019-03-05  532  	 * (new_addr, and old_addr), because userspace will not know the
ea2c3f6f554561 Oscar Salvador     2019-03-05  533  	 * state of the vma's after it gets -ENOMEM.
ea2c3f6f554561 Oscar Salvador     2019-03-05  534  	 * So, to avoid such scenario we can pre-compute if the whole
ea2c3f6f554561 Oscar Salvador     2019-03-05  535  	 * operation has high chances to success map-wise.
ea2c3f6f554561 Oscar Salvador     2019-03-05  536  	 * Worst-scenario case is when both vma's (new_addr and old_addr) get
ea2c3f6f554561 Oscar Salvador     2019-03-05  537  	 * split in 3 before unmaping it.
ea2c3f6f554561 Oscar Salvador     2019-03-05  538  	 * That means 2 more maps (1 for each) to the ones we already hold.
ea2c3f6f554561 Oscar Salvador     2019-03-05  539  	 * Check whether current map count plus 2 still leads us to 4 maps below
ea2c3f6f554561 Oscar Salvador     2019-03-05  540  	 * the threshold, otherwise return -ENOMEM here to be more safe.
ea2c3f6f554561 Oscar Salvador     2019-03-05  541  	 */
ea2c3f6f554561 Oscar Salvador     2019-03-05  542  	if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
ea2c3f6f554561 Oscar Salvador     2019-03-05  543  		return -ENOMEM;
ea2c3f6f554561 Oscar Salvador     2019-03-05  544  
b22823719302e8 Mike Rapoport      2017-08-02  545  	ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
ecc1a8993751de Al Viro            2009-11-24  546  	if (ret)
ecc1a8993751de Al Viro            2009-11-24  547  		goto out;
ecc1a8993751de Al Viro            2009-11-24  548  
ecc1a8993751de Al Viro            2009-11-24  549  	if (old_len >= new_len) {
897ab3e0c49e24 Mike Rapoport      2017-02-24  550  		ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
ecc1a8993751de Al Viro            2009-11-24  551  		if (ret && old_len != new_len)
ecc1a8993751de Al Viro            2009-11-24  552  			goto out;
ecc1a8993751de Al Viro            2009-11-24  553  		old_len = new_len;
ecc1a8993751de Al Viro            2009-11-24  554  	}
ecc1a8993751de Al Viro            2009-11-24  555  
98663ca0550162 Brian Geffon       2020-01-22  556  	/*
98663ca0550162 Brian Geffon       2020-01-22  557  	 * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
98663ca0550162 Brian Geffon       2020-01-22  558  	 * check that we can expand by old_len and vma_to_resize will handle
98663ca0550162 Brian Geffon       2020-01-22  559  	 * the vma growing.
98663ca0550162 Brian Geffon       2020-01-22  560  	 */
98663ca0550162 Brian Geffon       2020-01-22 @561  	if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
98663ca0550162 Brian Geffon       2020-01-22  562  				vma->vm_flags, old_len >> PAGE_SHIFT))) {
                                                                                ^^^^^^^^^^^^^

98663ca0550162 Brian Geffon       2020-01-22  563  		ret = -ENOMEM;
98663ca0550162 Brian Geffon       2020-01-22  564  		goto out;
98663ca0550162 Brian Geffon       2020-01-22  565  	}
98663ca0550162 Brian Geffon       2020-01-22  566  
ecc1a8993751de Al Viro            2009-11-24  567  	vma = vma_to_resize(addr, old_len, new_len, &charged);
                                                        ^^^^^^^^^^^^^^^^^^^^

ecc1a8993751de Al Viro            2009-11-24  568  	if (IS_ERR(vma)) {
ecc1a8993751de Al Viro            2009-11-24  569  		ret = PTR_ERR(vma);
ecc1a8993751de Al Viro            2009-11-24  570  		goto out;
ecc1a8993751de Al Viro            2009-11-24  571  	}
ecc1a8993751de Al Viro            2009-11-24  572  
097eed103862f9 Al Viro            2009-11-24  573  	map_flags = MAP_FIXED;

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-23  1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon
                   ` (2 preceding siblings ...)
  2020-01-27  4:46 ` [PATCH] " Dan Carpenter
@ 2020-01-27  5:30 ` Brian Geffon
  2020-01-28 15:26   ` Will Deacon
  3 siblings, 1 reply; 21+ messages in thread
From: Brian Geffon @ 2020-01-27  5:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Michael S . Tsirkin, Brian Geffon, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes,
	Nathan Chancellor

When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
set, the source mapping will not be removed. Instead it will be
cleared as if a brand new anonymous, private mapping had been created
atomically as part of the mremap() call.  If a userfaultfd was watching
the source, it will continue to watch the new mapping.  For a mapping
that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
mremap() call to fail. MREMAP_DONTUNMAP requires that MREMAP_FIXED is
also used. The final result is two equally sized VMAs where the
destination contains the PTEs of the source.
   
We hope to use this in Chrome OS where with userfaultfd we could write
an anonymous mapping to disk without having to STOP the process or worry
about VMA permission changes.
   
This feature also has a use case in Android, Lokesh Gidra has said
that "As part of using userfaultfd for GC, We'll have to move the physical
pages of the java heap to a separate location. For this purpose mremap
will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
heap, its virtual mapping will be removed as well. Therefore, we'll
require performing mmap immediately after. This is not only time consuming
but also opens a time window where a native thread may call mmap and
reserve the java heap's address range for its own usage. This flag
solves the problem."
   
Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 include/uapi/linux/mman.h |  5 +++--
 mm/mremap.c               | 38 +++++++++++++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index fc1a64c3447b..923cc162609c 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
 
-#define MREMAP_MAYMOVE	1
-#define MREMAP_FIXED	2
+#define MREMAP_MAYMOVE		1
+#define MREMAP_FIXED		2
+#define MREMAP_DONTUNMAP	4
 
 #define OVERCOMMIT_GUESS		0
 #define OVERCOMMIT_ALWAYS		1
diff --git a/mm/mremap.c b/mm/mremap.c
index 122938dcec15..1d164e5fdff0 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long old_addr, unsigned long old_len,
 		unsigned long new_len, unsigned long new_addr,
-		bool *locked, struct vm_userfaultfd_ctx *uf,
-		struct list_head *uf_unmap)
+		bool *locked, unsigned long flags,
+		struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma;
@@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (unlikely(vma->vm_flags & VM_PFNMAP))
 		untrack_pfn_moved(vma);
 
+	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
+		if (vm_flags & VM_ACCOUNT)
+			vma->vm_flags |= VM_ACCOUNT;
+
+		goto out;
+	}
+
 	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		vm_unacct_memory(excess >> PAGE_SHIFT);
@@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 			vma->vm_next->vm_flags |= VM_ACCOUNT;
 	}
 
+out:
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		*locked = true;
@@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 
 static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		unsigned long new_addr, unsigned long new_len, bool *locked,
-		struct vm_userfaultfd_ctx *uf,
+		unsigned long flags, struct vm_userfaultfd_ctx *uf,
 		struct list_head *uf_unmap_early,
 		struct list_head *uf_unmap)
 {
@@ -551,6 +559,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		goto out;
 	}
 
+	/*
+	 * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
+	 * check that we can expand by old_len and vma_to_resize will handle
+	 * the vma growing.
+	 */
+	if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
+				vma->vm_flags, old_len >> PAGE_SHIFT))) {
+		ret = -ENOMEM;
+		goto out;
+        }
+
 	map_flags = MAP_FIXED;
 	if (vma->vm_flags & VM_MAYSHARE)
 		map_flags |= MAP_SHARED;
@@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 	if (IS_ERR_VALUE(ret))
 		goto out1;
 
-	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
 		       uf_unmap);
 	if (!(offset_in_page(ret)))
 		goto out;
@@ -609,12 +628,16 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	addr = untagged_addr(addr);
 	new_addr = untagged_addr(new_addr);
 
-	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
+	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) {
 		return ret;
+	}
 
 	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
 		return ret;
 
+	if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED))
+		return ret;
+
 	if (offset_in_page(addr))
 		return ret;
 
@@ -634,7 +657,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 
 	if (flags & MREMAP_FIXED) {
 		ret = mremap_to(addr, old_len, new_addr, new_len,
-				&locked, &uf, &uf_unmap_early, &uf_unmap);
+				&locked, flags, &uf, &uf_unmap_early,
+				&uf_unmap);
 		goto out;
 	}
 
@@ -712,7 +736,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 		}
 
 		ret = move_vma(vma, addr, old_len, new_len, new_addr,
-			       &locked, &uf, &uf_unmap);
+			       &locked, flags, &uf, &uf_unmap);
 	}
 out:
 	if (offset_in_page(ret)) {
-- 
2.25.0.341.g760bfbb309-goog


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
  2020-01-26  5:16   ` Nathan Chancellor
  2020-01-26 22:06   ` Kirill A. Shutemov
@ 2020-01-27 10:13   ` Florian Weimer
  2020-01-27 22:33     ` Brian Geffon
  2 siblings, 1 reply; 21+ messages in thread
From: Florian Weimer @ 2020-01-27 10:13 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

* Brian Geffon:

> When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> set, the source mapping will not be removed. Instead it will be
> cleared as if a brand new anonymous, private mapping had been created
> atomically as part of the mremap() call.  If a userfaultfd was watching
> the source, it will continue to watch the new mapping.  For a mapping
> that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
> also used. The final result is two equally sized VMAs where the
> destination contains the PTEs of the source.

What will be the protection flags of the source mapping?  Will they
remain unchanged?  Or PROT_NONE?

Thanks,
Florian


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-27 10:13   ` Florian Weimer
@ 2020-01-27 22:33     ` Brian Geffon
  2020-01-30 12:19       ` Florian Weimer
  0 siblings, 1 reply; 21+ messages in thread
From: Brian Geffon @ 2020-01-27 22:33 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

Hi Florian,
copy_vma will make a copy of the existing VMA leaving the old VMA
unchanged, so the source keeps its existing protections, this is what
makes it very useful along with userfaultfd.

Thanks,
Brian


On Mon, Jan 27, 2020 at 2:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Brian Geffon:
>
> > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> > set, the source mapping will not be removed. Instead it will be
> > cleared as if a brand new anonymous, private mapping had been created
> > atomically as part of the mremap() call.  If a userfaultfd was watching
> > the source, it will continue to watch the new mapping.  For a mapping
> > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
> > also used. The final result is two equally sized VMAs where the
> > destination contains the PTEs of the source.
>
> What will be the protection flags of the source mapping?  Will they
> remain unchanged?  Or PROT_NONE?
>
> Thanks,
> Florian
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-26 22:06   ` Kirill A. Shutemov
@ 2020-01-28  1:35     ` Brian Geffon
  2020-01-29 10:46       ` Kirill A. Shutemov
  0 siblings, 1 reply; 21+ messages in thread
From: Brian Geffon @ 2020-01-28  1:35 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

Hi Kirill,
Thanks for taking the time to look at this. I'll update the wording to
make it clear that MREMAP_FIXED is required with MREMAP_DONTUNMAP.

Regarding rmap, you're completely right I'm going to roll a new patch
which will call unlink_anon_vmas() to make sure the rmap is correct,
I'll also explicitly check that the vma is anonymous and not shared
returning EINVAL if not, how does that sound?

Brian

On Sun, Jan 26, 2020 at 2:06 PM Kirill A. Shutemov <kirill@shutemov.name> wrote:
>
> On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote:
> > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> > set, the source mapping will not be removed. Instead it will be
> > cleared as if a brand new anonymous, private mapping had been created
> > atomically as part of the mremap() call.  If a userfaultfd was watching
> > the source, it will continue to watch the new mapping.  For a mapping
> > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is
> > also used.
>
> Implies? From code it looks like it requires MREMAP_FIXED. And
> MREMAP_FIXED requires MREMAP_MAYMOVE. That's strange flag chaining.
> I don't really see need in such dependencies. It maybe indeed implied, not
> requied.
>
> > The final result is two equally sized VMAs where the
> > destination contains the PTEs of the source.
>
> Could you clarify rmap situation here? How the rmap hierarchy will look
> like before and after the operation. Can the new VMA merge with the old
> one? Sounds fishy to me.
>
> > We hope to use this in Chrome OS where with userfaultfd we could write
> > an anonymous mapping to disk without having to STOP the process or worry
> > about VMA permission changes.
> >
> > This feature also has a use case in Android, Lokesh Gidra has said
> > that "As part of using userfaultfd for GC, We'll have to move the physical
> > pages of the java heap to a separate location. For this purpose mremap
> > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> > heap, its virtual mapping will be removed as well. Therefore, we'll
> > require performing mmap immediately after. This is not only time consuming
> > but also opens a time window where a native thread may call mmap and
> > reserve the java heap's address range for its own usage. This flag
> > solves the problem."
>
> --
>  Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-27  5:30 ` [PATCH v3] " Brian Geffon
@ 2020-01-28 15:26   ` Will Deacon
  2020-01-30 10:12     ` Will Deacon
  0 siblings, 1 reply; 21+ messages in thread
From: Will Deacon @ 2020-01-28 15:26 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes,
	Nathan Chancellor

Hi Brian,

On Sun, Jan 26, 2020 at 09:30:56PM -0800, Brian Geffon wrote:
> When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> set, the source mapping will not be removed. Instead it will be
> cleared as if a brand new anonymous, private mapping had been created
> atomically as part of the mremap() call.  If a userfaultfd was watching
> the source, it will continue to watch the new mapping.  For a mapping
> that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> mremap() call to fail. MREMAP_DONTUNMAP requires that MREMAP_FIXED is
> also used. The final result is two equally sized VMAs where the
> destination contains the PTEs of the source.
>    
> We hope to use this in Chrome OS where with userfaultfd we could write
> an anonymous mapping to disk without having to STOP the process or worry
> about VMA permission changes.
>    
> This feature also has a use case in Android, Lokesh Gidra has said
> that "As part of using userfaultfd for GC, We'll have to move the physical
> pages of the java heap to a separate location. For this purpose mremap
> will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> heap, its virtual mapping will be removed as well. Therefore, we'll
> require performing mmap immediately after. This is not only time consuming
> but also opens a time window where a native thread may call mmap and
> reserve the java heap's address range for its own usage. This flag
> solves the problem."

Hmm, this sounds like you're dealing with a multi-threaded environment,
yet your change only supports private mappings. How does that work?

It's also worrying because, with two private mappings of the same anonymous
memory live simultaneously, you run the risk of hitting D-cache aliasing
issues on some architectures and losing coherency between them as a result
(even in a single-threaded scenario). Is userspace just supposed to deal
with this, or should we be enforcing SHMLBA alignment?
 
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
>  include/uapi/linux/mman.h |  5 +++--
>  mm/mremap.c               | 38 +++++++++++++++++++++++++++++++-------
>  2 files changed, 34 insertions(+), 9 deletions(-)

Could you also a include a patch to update the mremap man page, please?

https://www.kernel.org/doc/man-pages/patches.html

Cheers,

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-28  1:35     ` Brian Geffon
@ 2020-01-29 10:46       ` Kirill A. Shutemov
  2020-02-01 21:03         ` Brian Geffon
  2020-02-02  4:17         ` Brian Geffon
  0 siblings, 2 replies; 21+ messages in thread
From: Kirill A. Shutemov @ 2020-01-29 10:46 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

On Mon, Jan 27, 2020 at 05:35:40PM -0800, Brian Geffon wrote:
> Hi Kirill,
> Thanks for taking the time to look at this. I'll update the wording to
> make it clear that MREMAP_FIXED is required with MREMAP_DONTUNMAP.

I still think that chaining flags is strange. The new flag requires all
existing.

And based on the use case you probably don't really need 'fixed'
semantics all the time. The user should be fine with moving the mapping
*somewhere*, not neccessary to the given address.

BTW, name of the flag is confusing. My initial reaction was that it is
variant of MREMAP_FIXED that does't anything at the target address.
Like MAP_FIXED vs. MAP_FIXED_NOREPLACE.

Any better options for the flag name? (I have none)

> Regarding rmap, you're completely right I'm going to roll a new patch
> which will call unlink_anon_vmas() to make sure the rmap is correct,
> I'll also explicitly check that the vma is anonymous and not shared
> returning EINVAL if not, how does that sound?

Fine, I guess. But let me see the end result.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-28 15:26   ` Will Deacon
@ 2020-01-30 10:12     ` Will Deacon
  0 siblings, 0 replies; 21+ messages in thread
From: Will Deacon @ 2020-01-30 10:12 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes,
	Nathan Chancellor

On Tue, Jan 28, 2020 at 03:26:41PM +0000, Will Deacon wrote:
> On Sun, Jan 26, 2020 at 09:30:56PM -0800, Brian Geffon wrote:
> > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is
> > set, the source mapping will not be removed. Instead it will be
> > cleared as if a brand new anonymous, private mapping had been created
> > atomically as part of the mremap() call.  If a userfaultfd was watching
> > the source, it will continue to watch the new mapping.  For a mapping
> > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the
> > mremap() call to fail. MREMAP_DONTUNMAP requires that MREMAP_FIXED is
> > also used. The final result is two equally sized VMAs where the
> > destination contains the PTEs of the source.
> >    
> > We hope to use this in Chrome OS where with userfaultfd we could write
> > an anonymous mapping to disk without having to STOP the process or worry
> > about VMA permission changes.
> >    
> > This feature also has a use case in Android, Lokesh Gidra has said
> > that "As part of using userfaultfd for GC, We'll have to move the physical
> > pages of the java heap to a separate location. For this purpose mremap
> > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
> > heap, its virtual mapping will be removed as well. Therefore, we'll
> > require performing mmap immediately after. This is not only time consuming
> > but also opens a time window where a native thread may call mmap and
> > reserve the java heap's address range for its own usage. This flag
> > solves the problem."
> 
> Hmm, this sounds like you're dealing with a multi-threaded environment,
> yet your change only supports private mappings. How does that work?

Sorry, this was badly worded. I was trying to understand how the GC is
implememented, and whether everything was part of the same process or if
things like memfds or something else were being used to share memory. Having
spoken to Brian off-list, it's all one process...

> It's also worrying because, with two private mappings of the same anonymous
> memory live simultaneously, you run the risk of hitting D-cache aliasing
> issues on some architectures and losing coherency between them as a result
> (even in a single-threaded scenario). Is userspace just supposed to deal
> with this, or should we be enforcing SHMLBA alignment?

... and this was me completely misreading the patch. The old mapping is
still torn down, but then replaced with a new private mapping rather than
being unmapped.

However, looks like there are some issues handling shared mappings with
this patch (and possibly mlock()?), so I'll wait for a new spin.

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-27 22:33     ` Brian Geffon
@ 2020-01-30 12:19       ` Florian Weimer
  0 siblings, 0 replies; 21+ messages in thread
From: Florian Weimer @ 2020-01-30 12:19 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

* Brian Geffon:

> Hi Florian,
> copy_vma will make a copy of the existing VMA leaving the old VMA
> unchanged, so the source keeps its existing protections, this is what
> makes it very useful along with userfaultfd.

I see.  On the other hand, it's impossible to get the PROT_NONE behavior
by a subsequent mprotect call because the mremap has to fail in some
cases in vm.overcommit_memory=2 mode.  But maybe that other behavior can
be provided with a different flag if it turns out to be useful in the
future.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-29 10:46       ` Kirill A. Shutemov
@ 2020-02-01 21:03         ` Brian Geffon
  2020-02-02  4:17         ` Brian Geffon
  1 sibling, 0 replies; 21+ messages in thread
From: Brian Geffon @ 2020-02-01 21:03 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

Hi Kirill,

On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov
<kirill@shutemov.name> wrote:
> And based on the use case you probably don't really need 'fixed'
> semantics all the time. The user should be fine with moving the mapping
> *somewhere*, not neccessary to the given address

This is true and and it simplifies things a bit as for the outlined
use cases the user would not be required to mmap the destination
before hand. Part of the reason I chose to require MREMAP_FIXED was
because mremap need not move the mapping if it can shrink/grow in
place and it seemed a bit awkward to have "MUSTMOVE" behavior without
MAP_FIXED. I'll make this change to drop the requirement on
MREMAP_FIXED in my next patch.

> BTW, name of the flag is confusing. My initial reaction was that it is
> variant of MREMAP_FIXED that does't anything at the target address.
> Like MAP_FIXED vs. MAP_FIXED_NOREPLACE.
>
> Any better options for the flag name? (I have none)

I see your point. Perhaps MREMAP_MOVEPAGES or MREMAP_KEEP_SOURCE? I
struggle to come up with a single name that encapsulates this behavior
but I'll try to think of other ideas before I mail the next patch.
Given that we will drop the requirement on MREMAP_FIXED, perhaps
MOVEPAGES is the better option as it captures that the mapping WILL be
moved?

Thanks again for taking the time to look at this.

Best,
Brian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-01-29 10:46       ` Kirill A. Shutemov
  2020-02-01 21:03         ` Brian Geffon
@ 2020-02-02  4:17         ` Brian Geffon
  2020-02-03 13:09           ` Kirill A. Shutemov
  1 sibling, 1 reply; 21+ messages in thread
From: Brian Geffon @ 2020-02-02  4:17 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov
<kirill@shutemov.name> wrote:
> Any better options for the flag name? (I have none)

The other option is that it's broken up into two new flags the first
MREMAP_MUSTMOVE which can be used regardless of whether or not you're
leaving the original mapping mapped. This would do exactly what it
describes: move the mapping to a new address with or without
MREMAP_FIXED, this keeps consistency with MAYMOVE.

The second flag would be the new MREMAP_DONTUNMAP flag which requires
MREMAP_MUSTMOVE, again with or without MREMAP_FIXED.

What are your thoughts on this?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-02-02  4:17         ` Brian Geffon
@ 2020-02-03 13:09           ` Kirill A. Shutemov
  2020-02-07 20:42             ` Brian Geffon
  0 siblings, 1 reply; 21+ messages in thread
From: Kirill A. Shutemov @ 2020-02-03 13:09 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

On Sun, Feb 02, 2020 at 05:17:53AM +0100, Brian Geffon wrote:
> On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov
> <kirill@shutemov.name> wrote:
> > Any better options for the flag name? (I have none)
> 
> The other option is that it's broken up into two new flags the first
> MREMAP_MUSTMOVE which can be used regardless of whether or not you're
> leaving the original mapping mapped. This would do exactly what it
> describes: move the mapping to a new address with or without
> MREMAP_FIXED, this keeps consistency with MAYMOVE.
> 
> The second flag would be the new MREMAP_DONTUNMAP flag which requires
> MREMAP_MUSTMOVE, again with or without MREMAP_FIXED.
> 
> What are your thoughts on this?

Sounds reasonable.

MREMAP_DONTUNMAP doesn't really convey that you move pages to the new
mapping, but leave empty mapping behind. But I guess there's only so much
you can encode into the name. (Patch to the man page should do the rest)

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-02-03 13:09           ` Kirill A. Shutemov
@ 2020-02-07 20:42             ` Brian Geffon
  2020-02-10 10:35               ` Kirill A. Shutemov
  0 siblings, 1 reply; 21+ messages in thread
From: Brian Geffon @ 2020-02-07 20:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

Hi Kirill,
I started a new thread https://lkml.org/lkml/2020/2/7/640 for my v4
patch. But I wanted to quickly address your comments. Regarding the
concern around the rmap, no changes actually need to be made. If we
were to unlink_anon_vma(vma) and then set vma->anon_vma = NULL, that
would be fine but then as soon as there was a fault the same anon_vma
would be attached since it's a private anonymous mapping. So there is
really nothing to do regarding the rmap.

I considered the two flag approach but since I could not come up with
a concrete use case of MREMAP_MUSTMOVE I decided to just leave the
single MREMAP_DONTUNMAP flag, the two flag approach would be only for
clarifying the operations so I'm not sure it's worth it. (Still trying
to come up with a better name). But I've attached a man page diff to
the latest patch.

Thanks,
Brian

On Mon, Feb 3, 2020 at 5:09 AM Kirill A. Shutemov <kirill@shutemov.name> wrote:
>
> On Sun, Feb 02, 2020 at 05:17:53AM +0100, Brian Geffon wrote:
> > On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov
> > <kirill@shutemov.name> wrote:
> > > Any better options for the flag name? (I have none)
> >
> > The other option is that it's broken up into two new flags the first
> > MREMAP_MUSTMOVE which can be used regardless of whether or not you're
> > leaving the original mapping mapped. This would do exactly what it
> > describes: move the mapping to a new address with or without
> > MREMAP_FIXED, this keeps consistency with MAYMOVE.
> >
> > The second flag would be the new MREMAP_DONTUNMAP flag which requires
> > MREMAP_MUSTMOVE, again with or without MREMAP_FIXED.
> >
> > What are your thoughts on this?
>
> Sounds reasonable.
>
> MREMAP_DONTUNMAP doesn't really convey that you move pages to the new
> mapping, but leave empty mapping behind. But I guess there's only so much
> you can encode into the name. (Patch to the man page should do the rest)
>
> --
>  Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap().
  2020-02-07 20:42             ` Brian Geffon
@ 2020-02-10 10:35               ` Kirill A. Shutemov
  0 siblings, 0 replies; 21+ messages in thread
From: Kirill A. Shutemov @ 2020-02-10 10:35 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML,
	linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli,
	Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes

On Fri, Feb 07, 2020 at 12:42:15PM -0800, Brian Geffon wrote:
> Hi Kirill,
> I started a new thread https://lkml.org/lkml/2020/2/7/640 for my v4
> patch. But I wanted to quickly address your comments. Regarding the
> concern around the rmap, no changes actually need to be made. If we
> were to unlink_anon_vma(vma) and then set vma->anon_vma = NULL, that
> would be fine but then as soon as there was a fault the same anon_vma
> would be attached since it's a private anonymous mapping. So there is
> really nothing to do regarding the rmap.

Okay.

My worry was that we create a new VMA with the same anon_vma *and*
vm_pgoff, but I just realized we can do the same with the current
mremap(2) plus following mmap(2) in the old place. So it's not regression.

I guess we are fine here.

> I considered the two flag approach but since I could not come up with
> a concrete use case of MREMAP_MUSTMOVE I decided to just leave the
> single MREMAP_DONTUNMAP flag, the two flag approach would be only for
> clarifying the operations so I'm not sure it's worth it. (Still trying
> to come up with a better name). But I've attached a man page diff to
> the latest patch.

At least it doesn't have 'FIXED' semantics forced on user. It's fine with
me.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2020-02-10 10:35 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-23  1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon
2020-01-23  3:02 ` Andy Lutomirski
2020-01-23 19:03   ` Brian Geffon
2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
2020-01-26  5:16   ` Nathan Chancellor
2020-01-27  2:21     ` Brian Geffon
2020-01-26 22:06   ` Kirill A. Shutemov
2020-01-28  1:35     ` Brian Geffon
2020-01-29 10:46       ` Kirill A. Shutemov
2020-02-01 21:03         ` Brian Geffon
2020-02-02  4:17         ` Brian Geffon
2020-02-03 13:09           ` Kirill A. Shutemov
2020-02-07 20:42             ` Brian Geffon
2020-02-10 10:35               ` Kirill A. Shutemov
2020-01-27 10:13   ` Florian Weimer
2020-01-27 22:33     ` Brian Geffon
2020-01-30 12:19       ` Florian Weimer
2020-01-27  4:46 ` [PATCH] " Dan Carpenter
2020-01-27  5:30 ` [PATCH v3] " Brian Geffon
2020-01-28 15:26   ` Will Deacon
2020-01-30 10:12     ` Will Deacon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).