All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail.com>
To: linux-mm@kvack.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Bibo Mao <maobibo@loongson.cn>
Subject: [PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion
Date: Sun, 20 Dec 2020 14:55:35 +1000	[thread overview]
Message-ID: <20201220045535.848591-4-npiggin@gmail.com> (raw)
In-Reply-To: <20201220045535.848591-1-npiggin@gmail.com>

Similarly to the previous patch, this tries to optimise dirty/accessed
bits in ptes to avoid access costs of hardware setting them.

This tidies up a few last cases where dirty/accessed faults can be seen,
and subsumes the pte_sw_mkyoung helper -- it's not just architectures
with explicit software dirty/accessed bits that take expensive faults to
modify ptes.

The vast majority of the remaining dirty/accessed faults on kbuild
workloads after this patch are from NUMA migration, due to
remove_migration_pte inserting old/clean ptes.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/mips/include/asm/pgtable.h |  2 --
 include/linux/pgtable.h         | 16 ----------------
 mm/huge_memory.c                |  4 ++--
 mm/memory.c                     | 14 +++++++-------
 mm/migrate.c                    |  1 +
 mm/shmem.c                      |  1 +
 mm/userfaultfd.c                |  2 +-
 7 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4f9c37616d42..3275495adccb 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -406,8 +406,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return pte;
 }
 
-#define pte_sw_mkyoung	pte_mkyoung
-
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)	{ return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8fcdfa52eb4b..70d04931dff4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -424,22 +424,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
-/*
- * On some architectures hardware does not set page access bit when accessing
- * memory page, it is responsibilty of software setting this bit. It brings
- * out extra page fault penalty to track page access bit. For optimization page
- * access bit can be set during all page fault flow on these arches.
- * To be differentiate with macro pte_mkyoung, this macro is used on platforms
- * where software maintains page access bit.
- */
-#ifndef pte_sw_mkyoung
-static inline pte_t pte_sw_mkyoung(pte_t pte)
-{
-	return pte;
-}
-#define pte_sw_mkyoung	pte_sw_mkyoung
-#endif
-
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f2ca0326b5af..f6719312dc27 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2151,8 +2151,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = maybe_mkwrite(entry, vma);
 			if (!write)
 				entry = pte_wrprotect(entry);
-			if (!young)
-				entry = pte_mkold(entry);
+			if (young)
+				entry = pte_mkyoung(entry);
 			if (soft_dirty)
 				entry = pte_mksoft_dirty(entry);
 			if (uffd_wp)
diff --git a/mm/memory.c b/mm/memory.c
index dd1f364d8ca3..4cebba596660 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1639,7 +1639,7 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
 	get_page(page);
 	inc_mm_counter_fast(mm, mm_counter_file(page));
 	page_add_file_rmap(page, false);
-	set_pte_at(mm, addr, pte, mk_pte(page, prot));
+	set_pte_at(mm, addr, pte, pte_mkyoung(mk_pte(page, prot)));
 	return 0;
 }
 
@@ -1954,10 +1954,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 	else
 		entry = pte_mkspecial(pfn_t_pte(pfn, prot));
 
-	if (mkwrite) {
-		entry = pte_mkyoung(entry);
+	entry = pte_mkyoung(entry);
+	if (mkwrite)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	}
 
 	set_pte_at(mm, addr, pte, entry);
 	update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
@@ -2889,7 +2888,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
-		entry = pte_sw_mkyoung(entry);
+		entry = pte_mkyoung(entry);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
@@ -3402,6 +3401,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
+	pte = pte_mkyoung(pte);
 	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		vmf->flags &= ~FAULT_FLAG_WRITE;
@@ -3545,7 +3545,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	__SetPageUptodate(page);
 
 	entry = mk_pte(page, vma->vm_page_prot);
-	entry = pte_sw_mkyoung(entry);
+	entry = pte_mkyoung(entry);
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3821,7 +3821,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
 
 	flush_icache_page(vma, page);
 	entry = mk_pte(page, vma->vm_page_prot);
-	entry = pte_sw_mkyoung(entry);
+	entry = pte_mkyoung(entry);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 	/* copy-on-write page */
diff --git a/mm/migrate.c b/mm/migrate.c
index ee5e612b4cd8..d33b2bfc846b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2963,6 +2963,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 		}
 	} else {
 		entry = mk_pte(page, vma->vm_page_prot);
+		entry = pte_mkyoung(entry);
 		if (vma->vm_flags & VM_WRITE)
 			entry = pte_mkwrite(pte_mkdirty(entry));
 	}
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c6b6d8f6c39..4f23b16d6baf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2420,6 +2420,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 		goto out_release;
 
 	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+	_dst_pte = pte_mkyoung(_dst_pte);
 	if (dst_vma->vm_flags & VM_WRITE)
 		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
 	else {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9a3d451402d7..56c44aa06a7e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -99,7 +99,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 	if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
 		goto out_release;
 
-	_dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
+	_dst_pte = pte_mkdirty(pte_mkyoung(mk_pte(page, dst_vma->vm_page_prot)));
 	if (dst_vma->vm_flags & VM_WRITE) {
 		if (wp_copy)
 			_dst_pte = pte_mkuffd_wp(_dst_pte);
-- 
2.23.0



  parent reply	other threads:[~2020-12-20  4:55 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-20  4:55 [PATCH v3 0/3] mm: improve pte updates and dirty/accessed Nicholas Piggin
2020-12-20  4:55 ` [PATCH v3 1/3] mm/cow: don't bother write protecting already write-protected huge pages Nicholas Piggin
2020-12-20  4:55 ` [PATCH v3 2/3] mm/cow: optimise pte accessed bit handling in fork Nicholas Piggin
2020-12-20  4:55 ` Nicholas Piggin [this message]
2020-12-21 18:21   ` [PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion Hugh Dickins
2020-12-22  3:24     ` Nicholas Piggin
2020-12-23  0:56       ` Huang Pei
2020-12-20 18:00 ` [PATCH v3 0/3] mm: improve pte updates and dirty/accessed Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201220045535.848591-4-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=linux-mm@kvack.org \
    --cc=maobibo@loongson.cn \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.