All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail.com>
To: linux-mm@kvack.org
Cc: linux-arch@vger.kernel.org, Nadav Amit <nadav.amit@gmail.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Nicholas Piggin <npiggin@gmail.com>,
	Minchan Kim <minchan@kernel.org>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linuxppc-dev@lists.ozlabs.org
Subject: [RFC PATCH 2/3] mm: mmu_gather track of invalidated TLB ranges explicitly for more precise flushing
Date: Tue, 12 Jun 2018 17:16:20 +1000	[thread overview]
Message-ID: <20180612071621.26775-3-npiggin@gmail.com> (raw)
In-Reply-To: <20180612071621.26775-1-npiggin@gmail.com>

The mmu_gather APIs keep track of the invalidated address range
including the span covered by invalidated page table pages. Page table
pages with no ptes (and therefore could not have TLB entries) still
need to be involved in the invalidation if the processor caches
intermediate levels of the page table.

This allows a backwards compatible / legacy implementation to cache
page tables without modification, if they invalidate their page table
cache using their existing tlb invalidation instructions.

However this additional flush range is not necessary if the
architecture provides explicit page table cache management, or if it
ensures that page table cache entries will never be instantiated if
they did not reach a valid pte.

This is very noticable on powerpc in the exec path, in shift_arg_pages
where the TLB flushing for the page table teardown is a very large
range that gets implemented as a full process flush. This patch
provides page_start and page_end fields to mmu_gather which
architectures can use to optimise their TLB flushing.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/asm-generic/tlb.h | 27 +++++++++++++++++++++++++--
 mm/memory.c               |  4 +++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index faddde44de8c..a006f702b4c2 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -96,6 +96,8 @@ struct mmu_gather {
 #endif
 	unsigned long		start;
 	unsigned long		end;
+	unsigned long		page_start;
+	unsigned long		page_end;
 	/* we are in the middle of an operation to clear
 	 * a full mm and can make some optimizations */
 	unsigned int		fullmm : 1,
@@ -128,13 +130,25 @@ static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 	tlb->end = max(tlb->end, address + range_size);
 }
 
+static inline void __tlb_adjust_page_range(struct mmu_gather *tlb,
+				      unsigned long address,
+				      unsigned int range_size)
+{
+	tlb->page_start = min(tlb->page_start, address);
+	tlb->page_end = max(tlb->page_end, address + range_size);
+}
+
+
 static inline void __tlb_reset_range(struct mmu_gather *tlb)
 {
 	if (tlb->fullmm) {
 		tlb->start = tlb->end = ~0;
+		tlb->page_start = tlb->page_end = ~0;
 	} else {
 		tlb->start = TASK_SIZE;
 		tlb->end = 0;
+		tlb->page_start = TASK_SIZE;
+		tlb->page_end = 0;
 	}
 }
 
@@ -210,12 +224,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_tlb_entry(tlb, ptep, address)		\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		__tlb_adjust_page_range(tlb, address, PAGE_SIZE); \
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	     \
 	do {							     \
 		__tlb_adjust_range(tlb, address, huge_page_size(h)); \
+		__tlb_adjust_page_range(tlb, address, huge_page_size(h)); \
 		__tlb_remove_tlb_entry(tlb, ptep, address);	     \
 	} while (0)
 
@@ -230,6 +246,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)			\
 	do {								\
 		__tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);	\
+		__tlb_adjust_page_range(tlb, address, HPAGE_PMD_SIZE);	\
 		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);		\
 	} while (0)
 
@@ -244,6 +261,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_pud_tlb_entry(tlb, pudp, address)			\
 	do {								\
 		__tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);	\
+		__tlb_adjust_page_range(tlb, address, HPAGE_PUD_SIZE);	\
 		__tlb_remove_pud_tlb_entry(tlb, pudp, address);		\
 	} while (0)
 
@@ -262,6 +280,11 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
  * architecture to do its own odd thing, not cause pain for others
  * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
  *
+ * Powerpc (Book3S 64-bit) with the radix MMU has an architected "page
+ * walk cache" that is invalidated with a specific instruction. It uses
+ * need_flush_all to issue this instruction, which is set by its own
+ * __p??_free_tlb functions.
+ *
  * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
  */
 
@@ -273,7 +296,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
 #define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)
 
@@ -288,7 +311,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #ifndef __ARCH_HAS_5LEVEL_HACK
 #define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 9d472e00fc2d..a46896b85e54 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -277,8 +277,10 @@ void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 {
 	struct mmu_gather_batch *batch, *next;
 
-	if (force)
+	if (force) {
 		__tlb_adjust_range(tlb, start, end - start);
+		__tlb_adjust_page_range(tlb, start, end - start);
+	}
 
 	tlb_flush_mmu(tlb);
 
-- 
2.17.0

WARNING: multiple messages have this Message-ID (diff)
From: Nicholas Piggin <npiggin@gmail.com>
To: linux-mm@kvack.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	linuxppc-dev@lists.ozlabs.org, linux-arch@vger.kernel.org,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>,
	Minchan Kim <minchan@kernel.org>,
	Mel Gorman <mgorman@techsingularity.net>,
	Nadav Amit <nadav.amit@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [RFC PATCH 2/3] mm: mmu_gather track of invalidated TLB ranges explicitly for more precise flushing
Date: Tue, 12 Jun 2018 17:16:20 +1000	[thread overview]
Message-ID: <20180612071621.26775-3-npiggin@gmail.com> (raw)
Message-ID: <20180612071620.oEeVLQH8icXDJlGpFehnL3yNySt1pCUmx1owWG-3pYY@z> (raw)
In-Reply-To: <20180612071621.26775-1-npiggin@gmail.com>

The mmu_gather APIs keep track of the invalidated address range
including the span covered by invalidated page table pages. Page table
pages with no ptes (and therefore could not have TLB entries) still
need to be involved in the invalidation if the processor caches
intermediate levels of the page table.

This allows a backwards compatible / legacy implementation to cache
page tables without modification, if they invalidate their page table
cache using their existing tlb invalidation instructions.

However this additional flush range is not necessary if the
architecture provides explicit page table cache management, or if it
ensures that page table cache entries will never be instantiated if
they did not reach a valid pte.

This is very noticable on powerpc in the exec path, in shift_arg_pages
where the TLB flushing for the page table teardown is a very large
range that gets implemented as a full process flush. This patch
provides page_start and page_end fields to mmu_gather which
architectures can use to optimise their TLB flushing.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/asm-generic/tlb.h | 27 +++++++++++++++++++++++++--
 mm/memory.c               |  4 +++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index faddde44de8c..a006f702b4c2 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -96,6 +96,8 @@ struct mmu_gather {
 #endif
 	unsigned long		start;
 	unsigned long		end;
+	unsigned long		page_start;
+	unsigned long		page_end;
 	/* we are in the middle of an operation to clear
 	 * a full mm and can make some optimizations */
 	unsigned int		fullmm : 1,
@@ -128,13 +130,25 @@ static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 	tlb->end = max(tlb->end, address + range_size);
 }
 
+static inline void __tlb_adjust_page_range(struct mmu_gather *tlb,
+				      unsigned long address,
+				      unsigned int range_size)
+{
+	tlb->page_start = min(tlb->page_start, address);
+	tlb->page_end = max(tlb->page_end, address + range_size);
+}
+
+
 static inline void __tlb_reset_range(struct mmu_gather *tlb)
 {
 	if (tlb->fullmm) {
 		tlb->start = tlb->end = ~0;
+		tlb->page_start = tlb->page_end = ~0;
 	} else {
 		tlb->start = TASK_SIZE;
 		tlb->end = 0;
+		tlb->page_start = TASK_SIZE;
+		tlb->page_end = 0;
 	}
 }
 
@@ -210,12 +224,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_tlb_entry(tlb, ptep, address)		\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		__tlb_adjust_page_range(tlb, address, PAGE_SIZE); \
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	     \
 	do {							     \
 		__tlb_adjust_range(tlb, address, huge_page_size(h)); \
+		__tlb_adjust_page_range(tlb, address, huge_page_size(h)); \
 		__tlb_remove_tlb_entry(tlb, ptep, address);	     \
 	} while (0)
 
@@ -230,6 +246,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)			\
 	do {								\
 		__tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);	\
+		__tlb_adjust_page_range(tlb, address, HPAGE_PMD_SIZE);	\
 		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);		\
 	} while (0)
 
@@ -244,6 +261,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_pud_tlb_entry(tlb, pudp, address)			\
 	do {								\
 		__tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);	\
+		__tlb_adjust_page_range(tlb, address, HPAGE_PUD_SIZE);	\
 		__tlb_remove_pud_tlb_entry(tlb, pudp, address);		\
 	} while (0)
 
@@ -262,6 +280,11 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
  * architecture to do its own odd thing, not cause pain for others
  * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
  *
+ * Powerpc (Book3S 64-bit) with the radix MMU has an architected "page
+ * walk cache" that is invalidated with a specific instruction. It uses
+ * need_flush_all to issue this instruction, which is set by its own
+ * __p??_free_tlb functions.
+ *
  * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
  */
 
@@ -273,7 +296,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
 #define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)
 
@@ -288,7 +311,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #ifndef __ARCH_HAS_5LEVEL_HACK
 #define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 9d472e00fc2d..a46896b85e54 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -277,8 +277,10 @@ void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 {
 	struct mmu_gather_batch *batch, *next;
 
-	if (force)
+	if (force) {
 		__tlb_adjust_range(tlb, start, end - start);
+		__tlb_adjust_page_range(tlb, start, end - start);
+	}
 
 	tlb_flush_mmu(tlb);
 
-- 
2.17.0

  parent reply	other threads:[~2018-06-12  7:16 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-12  7:16 [RFC PATCH 0/3] couple of TLB flush optimisations Nicholas Piggin
2018-06-12  7:16 ` Nicholas Piggin
2018-06-12  7:16 ` [RFC PATCH 1/3] Revert "mm: always flush VMA ranges affected by zap_page_range" Nicholas Piggin
2018-06-12  7:16   ` Nicholas Piggin
2018-06-12 13:53   ` Aneesh Kumar K.V
2018-06-12 13:53     ` Aneesh Kumar K.V
2018-06-12 18:52   ` Nadav Amit
2018-06-12 18:52     ` Nadav Amit
2018-06-12 18:52     ` Nadav Amit
2018-06-12  7:16 ` Nicholas Piggin [this message]
2018-06-12  7:16   ` [RFC PATCH 2/3] mm: mmu_gather track of invalidated TLB ranges explicitly for more precise flushing Nicholas Piggin
2018-06-12 18:14   ` Linus Torvalds
2018-06-12 18:14     ` Linus Torvalds
2018-06-12  7:16 ` [RFC PATCH 3/3] powerpc/64s/radix: optimise TLB flush with precise TLB ranges in mmu_gather Nicholas Piggin
2018-06-12  7:16   ` Nicholas Piggin
2018-06-12 18:18   ` Linus Torvalds
2018-06-12 18:18     ` Linus Torvalds
2018-06-12 22:31     ` Nicholas Piggin
2018-06-12 22:31       ` Nicholas Piggin
2018-06-12 22:31       ` Nicholas Piggin
2018-06-12 22:42       ` Linus Torvalds
2018-06-12 22:42         ` Linus Torvalds
2018-06-12 22:42         ` Linus Torvalds
2018-06-12 23:09         ` Nicholas Piggin
2018-06-12 23:09           ` Nicholas Piggin
2018-06-12 23:09           ` Nicholas Piggin
2018-06-12 23:26           ` Linus Torvalds
2018-06-12 23:26             ` Linus Torvalds
2018-06-12 23:39             ` Linus Torvalds
2018-06-12 23:39               ` Linus Torvalds
2018-06-13  0:12               ` Nicholas Piggin
2018-06-13  0:12                 ` Nicholas Piggin
2018-06-13  1:10                 ` Linus Torvalds
2018-06-13  1:10                   ` Linus Torvalds
2018-06-14  2:49                   ` Nicholas Piggin
2018-06-14  2:49                     ` Nicholas Piggin
2018-06-14  6:15                     ` Linus Torvalds
2018-06-14  6:15                       ` Linus Torvalds
2018-06-14  6:51                       ` Nicholas Piggin
2018-06-14  6:51                         ` Nicholas Piggin
2018-06-12 23:53             ` Nicholas Piggin
2018-06-12 23:53               ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180612071621.26775-3-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mgorman@techsingularity.net \
    --cc=minchan@kernel.org \
    --cc=nadav.amit@gmail.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.