linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nadav Amit <nadav.amit@gmail.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Nadav Amit <namit@vmware.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andy Lutomirski <luto@kernel.org>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Will Deacon <will@kernel.org>, Yu Zhao <yuzhao@google.com>,
	Nick Piggin <npiggin@gmail.com>,
	x86@kernel.org
Subject: [RFC 07/20] mm: move x86 tlb_gen to generic code
Date: Sat, 30 Jan 2021 16:11:19 -0800	[thread overview]
Message-ID: <20210131001132.3368247-8-namit@vmware.com> (raw)
In-Reply-To: <20210131001132.3368247-1-namit@vmware.com>

From: Nadav Amit <namit@vmware.com>

x86 currently has a TLB-generation tracking logic that can be used by
additional architectures (as long as they implement some additional
logic).

Extract the relevant pieces of code from x86 to general TLB code. This
would be useful to allow to write the next "fine granularity deferred
TLB flushes detection" patches without making them x86-specific.

Signed-off-by: Nadav Amit <namit@vmware.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: x86@kernel.org
---
 arch/x86/Kconfig                   |  1 +
 arch/x86/include/asm/mmu.h         | 10 --------
 arch/x86/include/asm/mmu_context.h |  1 -
 arch/x86/include/asm/tlbflush.h    | 18 --------------
 arch/x86/mm/tlb.c                  |  8 +++---
 drivers/firmware/efi/efi.c         |  1 +
 include/linux/mm_types.h           | 39 ++++++++++++++++++++++++++++++
 init/Kconfig                       |  6 +++++
 kernel/fork.c                      |  2 ++
 mm/init-mm.c                       |  1 +
 mm/rmap.c                          |  9 ++++++-
 11 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 591efb2476bc..6bd4d626a6b3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,6 +86,7 @@ config X86
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_DEBUG_WX
+	select ARCH_HAS_TLB_GENERATIONS
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5d7494631ea9..134454956c96 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -23,16 +23,6 @@ typedef struct {
 	 */
 	u64 ctx_id;
 
-	/*
-	 * Any code that needs to do any sort of TLB flushing for this
-	 * mm will first make its changes to the page tables, then
-	 * increment tlb_gen, then flush.  This lets the low-level
-	 * flushing code keep track of what needs flushing.
-	 *
-	 * This is not used on Xen PV.
-	 */
-	atomic64_t tlb_gen;
-
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
 	struct rw_semaphore	ldt_usr_sem;
 	struct ldt_struct	*ldt;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 27516046117a..e7597c642270 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -105,7 +105,6 @@ static inline int init_new_context(struct task_struct *tsk,
 	mutex_init(&mm->context.lock);
 
 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
-	atomic64_set(&mm->context.tlb_gen, 0);
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index a617dc0a9b06..2110b98026a7 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -235,24 +235,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
 }
 
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-{
-	/*
-	 * Bump the generation count.  This also serves as a full barrier
-	 * that synchronizes with switch_mm(): callers are required to order
-	 * their read of mm_cpumask after their writes to the paging
-	 * structures.
-	 */
-	return atomic64_inc_return(&mm->context.tlb_gen);
-}
-
-static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-					struct mm_struct *mm)
-{
-	inc_mm_tlb_gen(mm);
-	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-}
-
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 
 static inline bool pte_may_need_flush(pte_t oldpte, pte_t newpte)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 569ac1d57f55..7ab21430be41 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -511,7 +511,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 * the TLB shootdown code.
 		 */
 		smp_mb();
-		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+		next_tlb_gen = atomic64_read(&next->tlb_gen);
 		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
 				next_tlb_gen)
 			return;
@@ -546,7 +546,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 */
 		if (next != &init_mm)
 			cpumask_set_cpu(cpu, mm_cpumask(next));
-		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+		next_tlb_gen = atomic64_read(&next->tlb_gen);
 
 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
 
@@ -618,7 +618,7 @@ void initialize_tlbstate_and_flush(void)
 {
 	int i;
 	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
+	u64 tlb_gen = atomic64_read(&init_mm.tlb_gen);
 	unsigned long cr3 = __read_cr3();
 
 	/* Assert that CR3 already references the right mm. */
@@ -667,7 +667,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 	 */
 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+	u64 mm_tlb_gen = atomic64_read(&loaded_mm->tlb_gen);
 	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
 
 	/* This code cannot presently handle being reentered. */
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index df3f9bcab581..02a6a1c81576 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -62,6 +62,7 @@ struct mm_struct efi_mm = {
 	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
 	.cpu_bitmap		= { [BITS_TO_LONGS(NR_CPUS)] = 0},
+	INIT_TLB_GENERATIONS
 };
 
 struct workqueue_struct *efi_rts_wq;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0974ad501a47..2035ac319c2b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -560,6 +560,17 @@ struct mm_struct {
 
 #ifdef CONFIG_IOMMU_SUPPORT
 		u32 pasid;
+#endif
+#ifdef CONFIG_ARCH_HAS_TLB_GENERATIONS
+		/*
+		 * Any code that needs to do any sort of TLB flushing for this
+		 * mm will first make its changes to the page tables, then
+		 * increment tlb_gen, then flush.  This lets the low-level
+		 * flushing code keep track of what needs flushing.
+		 *
+		 * This is not used on Xen PV.
+		 */
+		atomic64_t tlb_gen;
 #endif
 	} __randomize_layout;
 
@@ -676,6 +687,34 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
 	return atomic_read(&mm->tlb_flush_pending) > 1;
 }
 
+#ifdef CONFIG_ARCH_HAS_TLB_GENERATIONS
+static inline void init_mm_tlb_gen(struct mm_struct *mm)
+{
+	atomic64_set(&mm->tlb_gen, 0);
+}
+
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+	/*
+	 * Bump the generation count.  This also serves as a full barrier
+	 * that synchronizes with switch_mm(): callers are required to order
+	 * their read of mm_cpumask after their writes to the paging
+	 * structures.
+	 */
+	return atomic64_inc_return(&mm->tlb_gen);
+}
+
+#define INIT_TLB_GENERATIONS					\
+	.tlb_gen	= ATOMIC64_INIT(0),
+
+#else
+
+static inline void init_mm_tlb_gen(struct mm_struct *mm) { }
+
+#define INIT_TLB_GENERATION
+
+#endif
+
 struct vm_fault;
 
 /**
diff --git a/init/Kconfig b/init/Kconfig
index b77c60f8b963..3d11a0f7c8cc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -842,6 +842,12 @@ config ARCH_SUPPORTS_NUMA_BALANCING
 # and the refill costs are offset by the savings of sending fewer IPIs.
 config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	bool
+	depends on ARCH_HAS_TLB_GENERATIONS
+
+#
+# For architectures that track for each address space the TLB generation.
+config ARCH_HAS_TLB_GENERATIONS
+	bool
 
 config CC_HAS_INT128
 	def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..3e735a86ab2c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1027,6 +1027,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	RCU_INIT_POINTER(mm->exe_file, NULL);
 	mmu_notifier_subscriptions_init(mm);
 	init_tlb_flush_pending(mm);
+	init_mm_tlb_gen(mm);
+
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
 #endif
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 153162669f80..ef3a471f4de4 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -38,5 +38,6 @@ struct mm_struct init_mm = {
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 	.user_ns	= &init_user_ns,
 	.cpu_bitmap	= CPU_BITS_NONE,
+	INIT_TLB_GENERATIONS
 	INIT_MM_CONTEXT(init_mm)
 };
diff --git a/mm/rmap.c b/mm/rmap.c
index 08c56aaf72eb..9655e1fc328a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -613,11 +613,18 @@ void try_to_unmap_flush_dirty(void)
 		try_to_unmap_flush();
 }
 
+static inline void tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+				   struct mm_struct *mm)
+{
+	inc_mm_tlb_gen(mm);
+	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+}
+
 static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 
-	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
+	tlbbatch_add_mm(&tlb_ubc->arch, mm);
 	tlb_ubc->flush_required = true;
 
 	/*
-- 
2.25.1


  parent reply	other threads:[~2021-01-31  0:20 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-31  0:11 [RFC 00/20] TLB batching consolidation and enhancements Nadav Amit
2021-01-31  0:11 ` [RFC 01/20] mm/tlb: fix fullmm semantics Nadav Amit
2021-01-31  1:02   ` Andy Lutomirski
2021-01-31  1:19     ` Nadav Amit
2021-01-31  2:57       ` Andy Lutomirski
2021-02-01  7:30         ` Nadav Amit
2021-02-01 11:36   ` Peter Zijlstra
2021-02-02  9:32     ` Nadav Amit
2021-02-02 11:00       ` Peter Zijlstra
2021-02-02 21:35         ` Nadav Amit
2021-02-03  9:44           ` Will Deacon
2021-02-04  3:20             ` Nadav Amit
2021-01-31  0:11 ` [RFC 02/20] mm/mprotect: use mmu_gather Nadav Amit
2021-01-31  0:11 ` [RFC 03/20] mm/mprotect: do not flush on permission promotion Nadav Amit
2021-01-31  1:07   ` Andy Lutomirski
2021-01-31  1:17     ` Nadav Amit
2021-01-31  2:59       ` Andy Lutomirski
     [not found]     ` <7a6de15a-a570-31f2-14d6-a8010296e694@citrix.com>
2021-02-01  5:58       ` Nadav Amit
2021-02-01 15:38         ` Andrew Cooper
2021-01-31  0:11 ` [RFC 04/20] mm/mapping_dirty_helpers: use mmu_gather Nadav Amit
2021-01-31  0:11 ` [RFC 05/20] mm/tlb: move BATCHED_UNMAP_TLB_FLUSH to tlb.h Nadav Amit
2021-01-31  0:11 ` [RFC 06/20] fs/task_mmu: use mmu_gather interface of clear-soft-dirty Nadav Amit
2021-01-31  0:11 ` Nadav Amit [this message]
2021-01-31 18:26   ` [RFC 07/20] mm: move x86 tlb_gen to generic code Andy Lutomirski
2021-01-31  0:11 ` [RFC 08/20] mm: store completed TLB generation Nadav Amit
2021-01-31 20:32   ` Andy Lutomirski
2021-02-01  7:28     ` Nadav Amit
2021-02-01 16:53       ` Andy Lutomirski
2021-02-01 11:52   ` Peter Zijlstra
2021-01-31  0:11 ` [RFC 09/20] mm: create pte/pmd_tlb_flush_pending() Nadav Amit
2021-01-31  0:11 ` [RFC 10/20] mm: add pte_to_page() Nadav Amit
2021-01-31  0:11 ` [RFC 11/20] mm/tlb: remove arch-specific tlb_start/end_vma() Nadav Amit
2021-02-01 12:09   ` Peter Zijlstra
2021-02-02  6:41     ` Nicholas Piggin
2021-02-02  7:20       ` Nadav Amit
2021-02-02  9:31         ` Peter Zijlstra
2021-02-02  9:54           ` Nadav Amit
2021-02-02 11:04             ` Peter Zijlstra
2021-01-31  0:11 ` [RFC 12/20] mm/tlb: save the VMA that is flushed during tlb_start_vma() Nadav Amit
2021-02-01 12:28   ` Peter Zijlstra
2021-01-31  0:11 ` [RFC 13/20] mm/tlb: introduce tlb_start_ptes() and tlb_end_ptes() Nadav Amit
     [not found]   ` <YBaBcc2jEGaxuxH0@fedora.tometzki.de>
2021-02-01  7:29     ` Nadav Amit
2021-02-01 13:19   ` Peter Zijlstra
2021-02-01 23:00     ` Nadav Amit
2021-01-31  0:11 ` [RFC 14/20] mm: move inc/dec_tlb_flush_pending() to mmu_gather.c Nadav Amit
2021-01-31  0:11 ` [RFC 15/20] mm: detect deferred TLB flushes in vma granularity Nadav Amit
2021-02-01 22:04   ` Nadav Amit
2021-02-02  0:14     ` Andy Lutomirski
2021-02-02 20:51       ` Nadav Amit
2021-02-04  4:35         ` Andy Lutomirski
2021-01-31  0:11 ` [RFC 16/20] mm/tlb: per-page table generation tracking Nadav Amit
2021-01-31  0:11 ` [RFC 17/20] mm/tlb: updated completed deferred TLB flush conditionally Nadav Amit
2021-01-31  0:11 ` [RFC 18/20] mm: make mm_cpumask() volatile Nadav Amit
2021-01-31  0:11 ` [RFC 19/20] lib/cpumask: introduce cpumask_atomic_or() Nadav Amit
2021-01-31  0:11 ` [RFC 20/20] mm/rmap: avoid potential races Nadav Amit
2021-08-23  8:05   ` Huang, Ying
2021-08-23 15:50     ` Nadav Amit
2021-08-24  0:36       ` Huang, Ying
2021-01-31  0:39 ` [RFC 00/20] TLB batching consolidation and enhancements Andy Lutomirski
2021-01-31  1:08   ` Nadav Amit
2021-01-31  3:30 ` Nicholas Piggin
2021-01-31  7:57   ` Nadav Amit
2021-01-31  8:14     ` Nadav Amit
2021-02-01 12:44     ` Peter Zijlstra
2021-02-02  7:14       ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210131001132.3368247-8-namit@vmware.com \
    --to=nadav.amit@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=namit@vmware.com \
    --cc=npiggin@gmail.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).