From: Matthew Wilcox <willy@infradead.org>
To: Yang Shi <yang.shi@linux.alibaba.com>
Cc: mhocko@kernel.org, ldufour@linux.vnet.ibm.com,
kirill@shutemov.name, vbabka@suse.cz, akpm@linux-foundation.org,
peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
alexander.shishkin@linux.intel.com, jolsa@redhat.com,
namhyung@kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: Re: [RFC v8 PATCH 3/5] mm: mmap: zap pages with read mmap_sem in munmap
Date: Wed, 15 Aug 2018 14:09:46 -0700 [thread overview]
Message-ID: <20180815210946.GA28919@bombadil.infradead.org> (raw)
In-Reply-To: <20180815191606.GA4201@bombadil.infradead.org>
On Wed, Aug 15, 2018 at 12:16:06PM -0700, Matthew Wilcox wrote:
> (not even compiled, and I can see a good opportunity for combining the
> VM_LOCKED loop with the has_uprobes loop)
I was rushing to get that sent earlier. Here it is tidied up to
actually compile.
Note the diffstat:
mmap.c | 71 ++++++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 42 insertions(+), 29 deletions(-)
I think that's a pretty small extra price to pay for having this improved
scalability.
diff --git a/mm/mmap.c b/mm/mmap.c
index de699523c0b7..b77bb3908f8c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2802,7 +2802,9 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
struct list_head *uf)
{
unsigned long end;
- struct vm_area_struct *vma, *prev, *last;
+ struct vm_area_struct *vma, *prev, *last, *tmp;
+ int res = 0;
+ bool downgrade = false;
if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
return -EINVAL;
@@ -2811,17 +2813,20 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
if (len == 0)
return -EINVAL;
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+
/* Find the first overlapping VMA */
vma = find_vma(mm, start);
if (!vma)
- return 0;
+ goto unlock;
prev = vma->vm_prev;
- /* we have start < vma->vm_end */
+ /* we have start < vma->vm_end */
/* if it doesn't overlap, we have nothing.. */
end = start + len;
if (vma->vm_start >= end)
- return 0;
+ goto unlock;
/*
* If we need to split any vma, do it now to save pain later.
@@ -2831,28 +2836,27 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
* places tmp vma above, and higher split_vma places tmp vma below.
*/
if (start > vma->vm_start) {
- int error;
-
/*
* Make sure that map_count on return from munmap() will
* not exceed its limit; but let map_count go just above
* its limit temporarily, to help free resources as expected.
*/
+ res = -ENOMEM;
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
- return -ENOMEM;
+ goto unlock;
- error = __split_vma(mm, vma, start, 0);
- if (error)
- return error;
+ res = __split_vma(mm, vma, start, 0);
+ if (res)
+ goto unlock;
prev = vma;
}
/* Does it split the last one? */
last = find_vma(mm, end);
if (last && end > last->vm_start) {
- int error = __split_vma(mm, last, end, 1);
- if (error)
- return error;
+ res = __split_vma(mm, last, end, 1);
+ if (res)
+ goto unlock;
}
vma = prev ? prev->vm_next : mm->mmap;
@@ -2866,25 +2870,31 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
* split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for.
*/
- int error = userfaultfd_unmap_prep(vma, start, end, uf);
- if (error)
- return error;
+ res = userfaultfd_unmap_prep(vma, start, end, uf);
+ if (res)
+ goto unlock;
}
/*
* unlock any mlock()ed ranges before detaching vmas
+ * and check to see if there's any reason we might have to hold
+ * the mmap_sem write-locked while unmapping regions.
*/
- if (mm->locked_vm) {
- struct vm_area_struct *tmp = vma;
- while (tmp && tmp->vm_start < end) {
- if (tmp->vm_flags & VM_LOCKED) {
- mm->locked_vm -= vma_pages(tmp);
- munlock_vma_pages_all(tmp);
- }
- tmp = tmp->vm_next;
+ downgrade = true;
+
+ for (tmp = vma; tmp && tmp->vm_start < end; tmp = tmp->vm_next) {
+ if (tmp->vm_flags & VM_LOCKED) {
+ mm->locked_vm -= vma_pages(tmp);
+ munlock_vma_pages_all(tmp);
}
+ if (tmp->vm_file &&
+ has_uprobes(tmp, tmp->vm_start, tmp->vm_end))
+ downgrade = false;
}
+ if (downgrade)
+ downgrade_write(&mm->mmap_sem);
+
/*
* Remove the vma's, and unmap the actual pages
*/
@@ -2896,7 +2906,14 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
/* Fix up all other VM information */
remove_vma_list(mm, vma);
- return 0;
+ res = 0;
+unlock:
+ if (downgrade) {
+ up_read(&mm->mmap_sem);
+ } else {
+ up_write(&mm->mmap_sem);
+ }
+ return res;
}
int vm_munmap(unsigned long start, size_t len)
@@ -2905,11 +2922,7 @@ int vm_munmap(unsigned long start, size_t len)
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
- return -EINTR;
-
ret = do_munmap(mm, start, len, &uf);
- up_write(&mm->mmap_sem);
userfaultfd_unmap_complete(mm, &uf);
return ret;
}
next prev parent reply other threads:[~2018-08-15 21:09 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-15 18:49 [RFC v8 PATCH 0/5] mm: zap pages with read mmap_sem in munmap for large mapping Yang Shi
2018-08-15 18:49 ` [RFC v8 PATCH 1/5] mm: refactor do_munmap() to extract the common part Yang Shi
2018-08-15 18:49 ` [RFC v8 PATCH 2/5] uprobes: introduce has_uprobes helper Yang Shi
2018-08-22 10:55 ` Vlastimil Babka
2018-08-22 15:07 ` Srikar Dronamraju
2018-08-22 20:51 ` Yang Shi
2018-08-23 15:15 ` Oleg Nesterov
2018-08-23 16:07 ` Yang Shi
2018-08-15 18:49 ` [RFC v8 PATCH 3/5] mm: mmap: zap pages with read mmap_sem in munmap Yang Shi
2018-08-15 19:16 ` Matthew Wilcox
2018-08-15 21:09 ` Matthew Wilcox [this message]
2018-08-15 21:54 ` Yang Shi
2018-08-16 2:46 ` Matthew Wilcox
2018-08-16 6:11 ` Yang Shi
2018-08-22 11:11 ` Vlastimil Babka
2018-08-22 19:20 ` Yang Shi
2018-08-22 11:19 ` Vlastimil Babka
2018-08-22 20:45 ` Yang Shi
2018-08-22 21:10 ` Kirill A. Shutemov
2018-08-22 21:42 ` Dave Hansen
2018-08-22 21:56 ` Yang Shi
2018-08-22 22:03 ` Dave Hansen
2018-08-15 18:49 ` [RFC v8 PATCH 4/5] mm: unmap VM_HUGETLB mappings with optimized path Yang Shi
2018-08-15 18:49 ` [RFC v8 PATCH 5/5] mm: unmap VM_PFNMAP " Yang Shi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180815210946.GA28919@bombadil.infradead.org \
--to=willy@infradead.org \
--cc=acme@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=jolsa@redhat.com \
--cc=kirill@shutemov.name \
--cc=ldufour@linux.vnet.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=vbabka@suse.cz \
--cc=yang.shi@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).