From: Andy Lutomirski <luto@kernel.org> To: X86 ML <x86@kernel.org> Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>, Borislav Petkov <bpetkov@suse.de>, Linus Torvalds <torvalds@linux-foundation.org>, Andrew Morton <akpm@linux-foundation.org>, Mel Gorman <mgorman@suse.de>, "linux-mm@kvack.org" <linux-mm@kvack.org>, Nadav Amit <nadav.amit@gmail.com>, Andy Lutomirski <luto@kernel.org>, Rik van Riel <riel@redhat.com>, Dave Hansen <dave.hansen@intel.com>, Nadav Amit <namit@vmware.com>, Michal Hocko <mhocko@suse.com>, Arjan van de Ven <arjan@linux.intel.com> Subject: [PATCH v2 06/11] x86/mm: Refactor flush_tlb_mm_range() to merge local and remote cases Date: Mon, 22 May 2017 15:30:06 -0700 [thread overview] Message-ID: <5bc77be16526cb03e4860f7feec99719ed81f0b6.1495492063.git.luto@kernel.org> (raw) In-Reply-To: <cover.1495492063.git.luto@kernel.org> In-Reply-To: <cover.1495492063.git.luto@kernel.org> The local flush path is very similar to the remote flush path. Merge them. This is intended to make no difference to behavior whatsoever. It removes some code and will make future changes to the flushing mechanics simpler. This patch does remove one small optimization: flush_tlb_mm_range() now has an unconditional smp_mb() instead of using MOV to CR3 or INVLPG as a full barrier when applicable. I think this is okay for a few reasons. First, smp_mb() is quite cheap compared to the cost of a TLB flush. Second, this rearrangement makes a bigger optimization available: with some work on the SMP function call code, we could do the local and remote flushes in parallel. Third, I'm planning a rework of the TLB flush algorithm that will require an atomic operation at the beginning of each flush, and that operation will replace the smp_mb(). Cc: Rik van Riel <riel@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Nadav Amit <namit@vmware.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> --- arch/x86/include/asm/tlbflush.h | 1 - arch/x86/mm/tlb.c | 113 +++++++++++++++++----------------------- 2 files changed, 48 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6f439ac92026..9934c7c99213 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -225,7 +225,6 @@ static inline void __flush_tlb_one(unsigned long addr) * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ - struct flush_tlb_info { struct mm_struct *mm; unsigned long start; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3143c9a180e5..12b8812e8926 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -216,22 +216,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * write/read ordering problems. */ -/* - * TLB flush funcation: - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ -static void flush_tlb_func(void *info) +static void flush_tlb_func_common(const struct flush_tlb_info *f, + bool local, enum tlb_flush_reason reason) { - const struct flush_tlb_info *f = info; - - inc_irq_stat(irq_tlb_count); - - if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm)) - return; - - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); - if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { leave_mm(smp_processor_id()); return; @@ -239,7 +226,9 @@ static void flush_tlb_func(void *info) if (f->end == TLB_FLUSH_ALL) { local_flush_tlb(); - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); + if (local) + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + trace_tlb_flush(reason, TLB_FLUSH_ALL); } else { unsigned long addr; unsigned long nr_pages = @@ -249,10 +238,32 @@ static void flush_tlb_func(void *info) __flush_tlb_single(addr); addr += PAGE_SIZE; } - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); + if (local) + count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); + trace_tlb_flush(reason, nr_pages); } } +static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) +{ + const struct flush_tlb_info *f = info; + + flush_tlb_func_common(f, true, reason); +} + +static void flush_tlb_func_remote(void *info) +{ + const struct flush_tlb_info *f = info; + + inc_irq_stat(irq_tlb_count); + + if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm)) + return; + + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN); +} + void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) { @@ -269,11 +280,11 @@ void native_flush_tlb_others(const struct cpumask *cpumask, cpu = smp_processor_id(); cpumask = uv_flush_tlb_others(cpumask, info); if (cpumask) - smp_call_function_many(cpumask, flush_tlb_func, + smp_call_function_many(cpumask, flush_tlb_func_remote, (void *)info, 1); return; } - smp_call_function_many(cpumask, flush_tlb_func, + smp_call_function_many(cpumask, flush_tlb_func_remote, (void *)info, 1); } @@ -292,61 +303,33 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { - unsigned long addr; - struct flush_tlb_info info; - /* do a global flush by default */ - unsigned long base_pages_to_flush = TLB_FLUSH_ALL; - - preempt_disable(); + int cpu; - if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) - base_pages_to_flush = (end - start) >> PAGE_SHIFT; - if (base_pages_to_flush > tlb_single_page_flush_ceiling) - base_pages_to_flush = TLB_FLUSH_ALL; - - if (current->active_mm != mm) { - /* Synchronize with switch_mm. */ - smp_mb(); - - goto out; - } - - if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { - leave_mm(smp_processor_id()); + struct flush_tlb_info info = { + .mm = mm, + }; - /* Synchronize with switch_mm. */ - smp_mb(); + cpu = get_cpu(); - goto out; - } + /* Synchronize with switch_mm. */ + smp_mb(); - /* - * Both branches below are implicit full barriers (MOV to CR or - * INVLPG) that synchronize with switch_mm. - */ - if (base_pages_to_flush == TLB_FLUSH_ALL) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - local_flush_tlb(); + /* Should we flush just the requested range? */ + if ((end != TLB_FLUSH_ALL) && + !(vmflag & VM_HUGETLB) && + ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) { + info.start = start; + info.end = end; } else { - /* flush range by one by one 'invlpg' */ - for (addr = start; addr < end; addr += PAGE_SIZE) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - __flush_tlb_single(addr); - } - } - trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); -out: - info.mm = mm; - if (base_pages_to_flush == TLB_FLUSH_ALL) { info.start = 0UL; info.end = TLB_FLUSH_ALL; - } else { - info.start = start; - info.end = end; } - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + + if (mm == current->active_mm) + flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN); + if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), &info); - preempt_enable(); + put_cpu(); } -- 2.9.3
WARNING: multiple messages have this Message-ID (diff)
From: Andy Lutomirski <luto@kernel.org> To: X86 ML <x86@kernel.org> Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>, Borislav Petkov <bpetkov@suse.de>, Linus Torvalds <torvalds@linux-foundation.org>, Andrew Morton <akpm@linux-foundation.org>, Mel Gorman <mgorman@suse.de>, "linux-mm@kvack.org" <linux-mm@kvack.org>, Nadav Amit <nadav.amit@gmail.com>, Andy Lutomirski <luto@kernel.org>, Rik van Riel <riel@redhat.com>, Dave Hansen <dave.hansen@intel.com>, Nadav Amit <namit@vmware.com>, Michal Hocko <mhocko@suse.com>, Arjan van de Ven <arjan@linux.intel.com> Subject: [PATCH v2 06/11] x86/mm: Refactor flush_tlb_mm_range() to merge local and remote cases Date: Mon, 22 May 2017 15:30:06 -0700 [thread overview] Message-ID: <5bc77be16526cb03e4860f7feec99719ed81f0b6.1495492063.git.luto@kernel.org> (raw) In-Reply-To: <cover.1495492063.git.luto@kernel.org> In-Reply-To: <cover.1495492063.git.luto@kernel.org> The local flush path is very similar to the remote flush path. Merge them. This is intended to make no difference to behavior whatsoever. It removes some code and will make future changes to the flushing mechanics simpler. This patch does remove one small optimization: flush_tlb_mm_range() now has an unconditional smp_mb() instead of using MOV to CR3 or INVLPG as a full barrier when applicable. I think this is okay for a few reasons. First, smp_mb() is quite cheap compared to the cost of a TLB flush. Second, this rearrangement makes a bigger optimization available: with some work on the SMP function call code, we could do the local and remote flushes in parallel. Third, I'm planning a rework of the TLB flush algorithm that will require an atomic operation at the beginning of each flush, and that operation will replace the smp_mb(). Cc: Rik van Riel <riel@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Nadav Amit <namit@vmware.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> --- arch/x86/include/asm/tlbflush.h | 1 - arch/x86/mm/tlb.c | 113 +++++++++++++++++----------------------- 2 files changed, 48 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6f439ac92026..9934c7c99213 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -225,7 +225,6 @@ static inline void __flush_tlb_one(unsigned long addr) * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ - struct flush_tlb_info { struct mm_struct *mm; unsigned long start; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3143c9a180e5..12b8812e8926 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -216,22 +216,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * write/read ordering problems. */ -/* - * TLB flush funcation: - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ -static void flush_tlb_func(void *info) +static void flush_tlb_func_common(const struct flush_tlb_info *f, + bool local, enum tlb_flush_reason reason) { - const struct flush_tlb_info *f = info; - - inc_irq_stat(irq_tlb_count); - - if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm)) - return; - - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); - if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { leave_mm(smp_processor_id()); return; @@ -239,7 +226,9 @@ static void flush_tlb_func(void *info) if (f->end == TLB_FLUSH_ALL) { local_flush_tlb(); - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); + if (local) + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + trace_tlb_flush(reason, TLB_FLUSH_ALL); } else { unsigned long addr; unsigned long nr_pages = @@ -249,10 +238,32 @@ static void flush_tlb_func(void *info) __flush_tlb_single(addr); addr += PAGE_SIZE; } - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); + if (local) + count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); + trace_tlb_flush(reason, nr_pages); } } +static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) +{ + const struct flush_tlb_info *f = info; + + flush_tlb_func_common(f, true, reason); +} + +static void flush_tlb_func_remote(void *info) +{ + const struct flush_tlb_info *f = info; + + inc_irq_stat(irq_tlb_count); + + if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm)) + return; + + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN); +} + void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) { @@ -269,11 +280,11 @@ void native_flush_tlb_others(const struct cpumask *cpumask, cpu = smp_processor_id(); cpumask = uv_flush_tlb_others(cpumask, info); if (cpumask) - smp_call_function_many(cpumask, flush_tlb_func, + smp_call_function_many(cpumask, flush_tlb_func_remote, (void *)info, 1); return; } - smp_call_function_many(cpumask, flush_tlb_func, + smp_call_function_many(cpumask, flush_tlb_func_remote, (void *)info, 1); } @@ -292,61 +303,33 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { - unsigned long addr; - struct flush_tlb_info info; - /* do a global flush by default */ - unsigned long base_pages_to_flush = TLB_FLUSH_ALL; - - preempt_disable(); + int cpu; - if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) - base_pages_to_flush = (end - start) >> PAGE_SHIFT; - if (base_pages_to_flush > tlb_single_page_flush_ceiling) - base_pages_to_flush = TLB_FLUSH_ALL; - - if (current->active_mm != mm) { - /* Synchronize with switch_mm. */ - smp_mb(); - - goto out; - } - - if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { - leave_mm(smp_processor_id()); + struct flush_tlb_info info = { + .mm = mm, + }; - /* Synchronize with switch_mm. */ - smp_mb(); + cpu = get_cpu(); - goto out; - } + /* Synchronize with switch_mm. */ + smp_mb(); - /* - * Both branches below are implicit full barriers (MOV to CR or - * INVLPG) that synchronize with switch_mm. - */ - if (base_pages_to_flush == TLB_FLUSH_ALL) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - local_flush_tlb(); + /* Should we flush just the requested range? */ + if ((end != TLB_FLUSH_ALL) && + !(vmflag & VM_HUGETLB) && + ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) { + info.start = start; + info.end = end; } else { - /* flush range by one by one 'invlpg' */ - for (addr = start; addr < end; addr += PAGE_SIZE) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - __flush_tlb_single(addr); - } - } - trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); -out: - info.mm = mm; - if (base_pages_to_flush == TLB_FLUSH_ALL) { info.start = 0UL; info.end = TLB_FLUSH_ALL; - } else { - info.start = start; - info.end = end; } - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + + if (mm == current->active_mm) + flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN); + if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), &info); - preempt_enable(); + put_cpu(); } -- 2.9.3 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2017-05-22 22:32 UTC|newest] Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top 2017-05-22 22:30 [PATCH v2 00/11] x86 TLB flush cleanups, moving toward PCID support Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 01/11] x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range() Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-24 10:21 ` [tip:x86/mm] " tip-bot for Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 02/11] x86/mm: Reduce indentation in flush_tlb_func() Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-24 10:22 ` [tip:x86/mm] " tip-bot for Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 03/11] x86/mm: Make the batched unmap TLB flush API more generic Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-24 10:22 ` [tip:x86/mm] mm, " tip-bot for Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 04/11] x86/mm: Pass flush_tlb_info to flush_tlb_others() etc Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-24 8:18 ` Ingo Molnar 2017-05-24 8:18 ` Ingo Molnar 2017-05-22 22:30 ` [PATCH v2 05/11] x86/mm: Change the leave_mm() condition for local TLB flushes Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski [this message] 2017-05-22 22:30 ` [PATCH v2 06/11] x86/mm: Refactor flush_tlb_mm_range() to merge local and remote cases Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 07/11] x86/mm: Use new merged flush logic in arch_tlbbatch_flush() Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 08/11] x86/mm: Remove the UP tlbflush code; always use the formerly SMP code Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 09/11] x86/mm: Rework lazy TLB to track the actual loaded mm Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 10/11] x86/mm: Be more consistent wrt PAGE_SHIFT vs PAGE_SIZE in tlb flush code Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 23:24 ` Nadav Amit 2017-05-22 23:24 ` Nadav Amit 2017-05-22 23:41 ` Andy Lutomirski 2017-05-22 23:41 ` Andy Lutomirski 2017-05-22 22:30 ` [PATCH v2 11/11] x86,kvm: Teach KVM's VMX code that CR3 isn't a constant Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski 2017-05-22 22:30 ` Andy Lutomirski
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=5bc77be16526cb03e4860f7feec99719ed81f0b6.1495492063.git.luto@kernel.org \ --to=luto@kernel.org \ --cc=akpm@linux-foundation.org \ --cc=arjan@linux.intel.com \ --cc=bpetkov@suse.de \ --cc=dave.hansen@intel.com \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=mgorman@suse.de \ --cc=mhocko@suse.com \ --cc=nadav.amit@gmail.com \ --cc=namit@vmware.com \ --cc=riel@redhat.com \ --cc=torvalds@linux-foundation.org \ --cc=x86@kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.