From: Brian Geffon <bgeffon@google.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
"Michael S . Tsirkin" <mst@redhat.com>,
Arnd Bergmann <arnd@arndb.de>, Brian Geffon <bgeffon@google.com>,
Sonny Rao <sonnyrao@google.com>, Minchan Kim <minchan@kernel.org>,
Joel Fernandes <joel@joelfernandes.org>,
Lokesh Gidra <lokeshgidra@google.com>,
linux-kernel@vger.kernel.org, linux-api@vger.kernel.org,
Yu Zhao <yuzhao@google.com>, Jesse Barnes <jsbarnes@google.com>
Subject: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap().
Date: Wed, 22 Jan 2020 17:46:27 -0800 [thread overview]
Message-ID: <20200123014627.71720-1-bgeffon@google.com> (raw)
MREMAP_DONTUNMAP is an additional flag that can be used with
MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2)
would then tear down the old vma so subsequent accesses to the vma
cause a segfault. However, with this new flag it will keep the old
vma with zapping PTEs so any access to the old VMA after that point
will result in a pagefault.
This feature will find a use in ChromeOS along with userfaultfd.
Specifically we will want to register a VMA with userfaultfd and then
pull it out from under a running process. By using MREMAP_DONTUNMAP we
don't have to worry about mprotecting and then potentially racing with
VMA permission changes from a running process.
This feature also has a use case in Android, Lokesh Gidra has said
that "As part of using userfaultfd for GC, We'll have to move the physical
pages of the java heap to a separate location. For this purpose mremap
will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java
heap, its virtual mapping will be removed as well. Therefore, we'll
require performing mmap immediately after. This is not only time consuming
but also opens a time window where a native thread may call mmap and
reserve the java heap's address range for its own usage. This flag
solves the problem."
Signed-off-by: Brian Geffon <bgeffon@google.com>
---
include/uapi/linux/mman.h | 5 +++--
mm/mremap.c | 37 ++++++++++++++++++++++++++++++-------
2 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index fc1a64c3447b..923cc162609c 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
-#define MREMAP_MAYMOVE 1
-#define MREMAP_FIXED 2
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+#define MREMAP_DONTUNMAP 4
#define OVERCOMMIT_GUESS 0
#define OVERCOMMIT_ALWAYS 1
diff --git a/mm/mremap.c b/mm/mremap.c
index 122938dcec15..bf97c3eb538b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
unsigned long new_len, unsigned long new_addr,
- bool *locked, struct vm_userfaultfd_ctx *uf,
- struct list_head *uf_unmap)
+ bool *locked, unsigned long flags,
+ struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
@@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
+ if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
+ if (vm_flags & VM_ACCOUNT)
+ vma->vm_flags |= VM_ACCOUNT;
+
+ goto out;
+ }
+
if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
@@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
vma->vm_next->vm_flags |= VM_ACCOUNT;
}
+out:
if (vm_flags & VM_LOCKED) {
mm->locked_vm += new_len >> PAGE_SHIFT;
*locked = true;
@@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
unsigned long new_addr, unsigned long new_len, bool *locked,
- struct vm_userfaultfd_ctx *uf,
+ unsigned long flags, struct vm_userfaultfd_ctx *uf,
struct list_head *uf_unmap_early,
struct list_head *uf_unmap)
{
@@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
old_len = new_len;
}
+ /*
+ * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will
+ * check that we can expand by old_len and vma_to_resize will handle
+ * the vma growing.
+ */
+ if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm,
+ vma->vm_flags, old_len >> PAGE_SHIFT))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
vma = vma_to_resize(addr, old_len, new_len, &charged);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
if (IS_ERR_VALUE(ret))
goto out1;
- ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+ ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
uf_unmap);
if (!(offset_in_page(ret)))
goto out;
@@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
addr = untagged_addr(addr);
new_addr = untagged_addr(new_addr);
- if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
+ if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
return ret;
if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
return ret;
+ if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED))
+ return ret;
+
if (offset_in_page(addr))
return ret;
@@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (flags & MREMAP_FIXED) {
ret = mremap_to(addr, old_len, new_addr, new_len,
- &locked, &uf, &uf_unmap_early, &uf_unmap);
+ &locked, flags, &uf, &uf_unmap_early,
+ &uf_unmap);
goto out;
}
@@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
}
ret = move_vma(vma, addr, old_len, new_len, new_addr,
- &locked, &uf, &uf_unmap);
+ &locked, flags, &uf, &uf_unmap);
}
out:
if (offset_in_page(ret)) {
--
2.25.0.341.g760bfbb309-goog
next reply other threads:[~2020-01-23 1:46 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-23 1:46 Brian Geffon [this message]
2020-01-23 3:02 ` [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Andy Lutomirski
2020-01-23 19:03 ` Brian Geffon
2020-01-24 19:06 ` [PATCH v2] " Brian Geffon
2020-01-26 5:16 ` Nathan Chancellor
2020-01-27 2:21 ` Brian Geffon
2020-01-26 22:06 ` Kirill A. Shutemov
2020-01-28 1:35 ` Brian Geffon
2020-01-29 10:46 ` Kirill A. Shutemov
2020-02-01 21:03 ` Brian Geffon
2020-02-02 4:17 ` Brian Geffon
2020-02-03 13:09 ` Kirill A. Shutemov
2020-02-07 20:42 ` Brian Geffon
2020-02-10 10:35 ` Kirill A. Shutemov
2020-01-27 10:13 ` Florian Weimer
2020-01-27 22:33 ` Brian Geffon
2020-01-30 12:19 ` Florian Weimer
2020-01-27 4:46 ` [PATCH] " Dan Carpenter
2020-01-27 5:30 ` [PATCH v3] " Brian Geffon
2020-01-28 15:26 ` Will Deacon
2020-01-30 10:12 ` Will Deacon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200123014627.71720-1-bgeffon@google.com \
--to=bgeffon@google.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=joel@joelfernandes.org \
--cc=jsbarnes@google.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lokeshgidra@google.com \
--cc=minchan@kernel.org \
--cc=mst@redhat.com \
--cc=sonnyrao@google.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).