From df3a57d1f6072d07978bafa7dbd9904cdf8f3e13 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Sep 2020 09:56:59 -0700 Subject: [PATCH 1/2] mm: split out the non-present case from copy_one_pte() This is a purely mechanical split of the copy_one_pte() function. It's not immediately obvious when looking at the diff because of the indentation change, but the way to see what is going on in this commit is to use the "-w" flag to not show pure whitespace changes, and you see how the first part of copy_one_pte() is simply lifted out into a separate function. And since the non-present case is marked unlikely, don't make the new function be inlined. Not that gcc really seems to care, since it looks like it will inline it anyway due to the whole "single callsite for static function" logic. In fact, code generation with the function split is almost identical to before. But not marking it inline is the right thing to do. This is pure prep-work and cleanup for subsequent changes. Signed-off-by: Linus Torvalds --- mm/memory.c | 152 ++++++++++++++++++++++++++++------------------------ 1 file changed, 82 insertions(+), 70 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 469af373ae76..31a3ab7d9aa3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -695,6 +695,84 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, * covered by this vma. */ +static unsigned long +copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, + unsigned long addr, int *rss) +{ + unsigned long vm_flags = vma->vm_flags; + pte_t pte = *src_pte; + struct page *page; + swp_entry_t entry = pte_to_swp_entry(pte); + + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } + rss[MM_SWAPENTS]++; + } else if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); + + rss[mm_counter(page)]++; + + if (is_write_migration_entry(entry) && + is_cow_mapping(vm_flags)) { + /* + * COW mappings require pages in both + * parent and child to be set to read. + */ + make_migration_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + set_pte_at(src_mm, addr, src_pte, pte); + } + } else if (is_device_private_entry(entry)) { + page = device_private_entry_to_page(entry); + + /* + * Update rss count even for unaddressable pages, as + * they should treated just like normal pages in this + * respect. + * + * We will likely want to have some new rss counters + * for unaddressable pages, at some point. But for now + * keep things as they are. + */ + get_page(page); + rss[mm_counter(page)]++; + page_dup_rmap(page, false); + + /* + * We do not preserve soft-dirty information, because so + * far, checkpoint/restore is the only feature that + * requires that. And checkpoint/restore does not work + * when a device driver is involved (you cannot easily + * save and restore device driver state). + */ + if (is_write_device_private_entry(entry) && + is_cow_mapping(vm_flags)) { + make_device_private_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + set_pte_at(src_mm, addr, src_pte, pte); + } + } + set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; +} + static inline unsigned long copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, @@ -705,75 +783,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct page *page; /* pte contains position in swap or file, so copy. */ - if (unlikely(!pte_present(pte))) { - swp_entry_t entry = pte_to_swp_entry(pte); - - if (likely(!non_swap_entry(entry))) { - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - - rss[mm_counter(page)]++; - - if (is_write_migration_entry(entry) && - is_cow_mapping(vm_flags)) { - /* - * COW mappings require pages in both - * parent and child to be set to read. - */ - make_migration_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_soft_dirty(*src_pte)) - pte = pte_swp_mksoft_dirty(pte); - if (pte_swp_uffd_wp(*src_pte)) - pte = pte_swp_mkuffd_wp(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } - } else if (is_device_private_entry(entry)) { - page = device_private_entry_to_page(entry); - - /* - * Update rss count even for unaddressable pages, as - * they should treated just like normal pages in this - * respect. - * - * We will likely want to have some new rss counters - * for unaddressable pages, at some point. But for now - * keep things as they are. - */ - get_page(page); - rss[mm_counter(page)]++; - page_dup_rmap(page, false); - - /* - * We do not preserve soft-dirty information, because so - * far, checkpoint/restore is the only feature that - * requires that. And checkpoint/restore does not work - * when a device driver is involved (you cannot easily - * save and restore device driver state). - */ - if (is_write_device_private_entry(entry) && - is_cow_mapping(vm_flags)) { - make_device_private_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_uffd_wp(*src_pte)) - pte = pte_swp_mkuffd_wp(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } - } - goto out_set_pte; - } + if (unlikely(!pte_present(pte))) + return copy_nonpresent_pte(dst_mm, src_mm, + dst_pte, src_pte, vma, + addr, rss); /* * If it's a COW mapping, write protect it both @@ -807,7 +820,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, rss[mm_counter(page)]++; } -out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte); return 0; } -- 2.28.0.218.gc12ef3d349