From: Guo Ren <guoren@linux.alibaba.com> The patchset fixes the remaining problems of asid_allocator. - Fixup _PAGE_GLOBAL for kernel virtual address mapping - Optimize tlb_flush with asid & range Changes since v3: - Optimize coding convention for "riscv: Use use_asid_allocator flush TLB" Changes since v2: - Remove PAGE_UP/DOWN usage in tlbflush.h - Optimize variable name Changes since v1: - Drop PAGE_UP wrong fixup - Rebase on clean linux-5.13-rc2 - Add Reviewed-by Guo Ren (2): riscv: Fixup _PAGE_GLOBAL in _PAGE_KERNEL riscv: Use use_asid_allocator flush TLB arch/riscv/include/asm/mmu_context.h | 2 ++ arch/riscv/include/asm/pgtable.h | 3 ++- arch/riscv/include/asm/tlbflush.h | 23 ++++++++++++++++++ arch/riscv/mm/context.c | 2 +- arch/riscv/mm/tlbflush.c | 46 +++++++++++++++++++++++++++++++++--- 5 files changed, 71 insertions(+), 5 deletions(-) -- 2.7.4
From: Guo Ren <guoren@linux.alibaba.com> Kernel virtual address translation should avoid to use ASIDs or it'll cause more TLB-miss and TLB-refill. Because the current ASID in satp belongs to the current process, but the target kernel va TLB entry's ASID still belongs to the previous process. Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Reviewed-by: Anup Patel <anup@brainfault.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Palmer Dabbelt <palmerdabbelt@google.com> --- arch/riscv/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 9469f46..346a3c6 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -134,7 +134,8 @@ | _PAGE_WRITE \ | _PAGE_PRESENT \ | _PAGE_ACCESSED \ - | _PAGE_DIRTY) + | _PAGE_DIRTY \ + | _PAGE_GLOBAL) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) -- 2.7.4
From: Guo Ren <guoren@linux.alibaba.com> Use static_branch_unlikely(&use_asid_allocator) to keep the origin tlb flush style, so it's no effect on the existing machine. Here are the optimized functions: - flush_tlb_mm - flush_tlb_page - flush_tlb_range All above are based on the below new implement functions: - __sbi_tlb_flush_range_asid - local_flush_tlb_range_asid These functions are based on ASID instead of previous non-ASID tlb_flush implementation which invalidates more useful tlb entries. Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Reviewed-by: Anup Patel <anup.patel@wdc.com> Cc: Palmer Dabbelt <palmerdabbelt@google.com> Cc: Christoph Hellwig <hch@lst.de> --- arch/riscv/include/asm/mmu_context.h | 2 ++ arch/riscv/include/asm/tlbflush.h | 23 ++++++++++++++++++ arch/riscv/mm/context.c | 2 +- arch/riscv/mm/tlbflush.c | 46 +++++++++++++++++++++++++++++++++--- 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index b065941..7030837 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, return 0; } +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); + #include <asm-generic/mmu_context.h> #endif /* _ASM_RISCV_MMU_CONTEXT_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index c84218a..cee476b 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,9 +22,32 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_range_asid(unsigned long start, + unsigned long size, unsigned long asid) +{ + unsigned long tmp, end = ALIGN(start + size, PAGE_SIZE); + + for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) { + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (tmp), "r" (asid) + : "memory"); + tmp += PAGE_SIZE; + } +} #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) +#define local_flush_tlb_range_asid(addr) do { } while (0) #endif /* CONFIG_MMU */ #if defined(CONFIG_SMP) && defined(CONFIG_MMU) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 68aa312..45c1b04 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -18,7 +18,7 @@ #ifdef CONFIG_MMU -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 720b443..87b4e52 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -4,6 +4,7 @@ #include <linux/smp.h> #include <linux/sched.h> #include <asm/sbi.h> +#include <asm/mmu_context.h> void flush_tlb_all(void) { @@ -39,18 +40,57 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, put_cpu(); } +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + struct cpumask hmask; + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + if (size == -1) + local_flush_tlb_all_asid(asid); + else + local_flush_tlb_range_asid(start, size, asid); + } else { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), + start, size, asid); + } + + put_cpu(); +} + void flush_tlb_mm(struct mm_struct *mm) { - __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1); + if (static_branch_unlikely(&use_asid_allocator)) + __sbi_tlb_flush_range_asid(mm_cpumask(mm), 0, -1, + atomic_long_read(&mm->context.id)); + else + __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE); + if (static_branch_unlikely(&use_asid_allocator)) + __sbi_tlb_flush_range_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, + atomic_long_read(&vma->vm_mm->context.id)); + else + __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start); + if (static_branch_unlikely(&use_asid_allocator)) + __sbi_tlb_flush_range_asid(mm_cpumask(vma->vm_mm), start, end - start, + atomic_long_read(&vma->vm_mm->context.id)); + else + __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start); } -- 2.7.4
On Wed, May 26, 2021 at 05:49:21AM +0000, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > Use static_branch_unlikely(&use_asid_allocator) to keep the origin > tlb flush style, so it's no effect on the existing machine. Here > are the optimized functions: > - flush_tlb_mm > - flush_tlb_page > - flush_tlb_range > > All above are based on the below new implement functions: > - __sbi_tlb_flush_range_asid > - local_flush_tlb_range_asid > > These functions are based on ASID instead of previous non-ASID > tlb_flush implementation which invalidates more useful tlb > entries. I still think the commit message is incomplete and rather misleading. Here is what I'd come up with from reading the patch: --------- Subject: add ASID-based tlbflushing methods Implement optimized version of the tlb flushing routines for systems using ASIDs. These are behind the use_asid_allocator static branch to not affect existing systems not using ASIDs. --------- > +static inline void local_flush_tlb_range_asid(unsigned long start, > + unsigned long size, unsigned long asid) > +{ > + unsigned long tmp, end = ALIGN(start + size, PAGE_SIZE); > + > + for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : > + : "r" (tmp), "r" (asid) > + : "memory"); > + tmp += PAGE_SIZE; > + } This double increments tmp. Also the non-ASID code switches to a global flush once flushing more than a single page. It might be worth documenting the tradeoff in the code. > +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, > + unsigned long start, > + unsigned long size, > + unsigned long asid) > +{ I don't think the calling conventions here are optimal. I'd pass the mm_struct as the first argument, as we can derive both the cpumask and asid from it instead of doing that in the callers. But more importantly I think the static branch check can be moved deeper into the code to avoid a lot of duplication. What do you think of this version? diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index b0659413a080..7030837adc1a 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, return 0; } +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); + #include <asm-generic/mmu_context.h> #endif /* _ASM_RISCV_MMU_CONTEXT_H */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 68aa312fc352..45c1b04b105d 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -18,7 +18,7 @@ #ifdef CONFIG_MMU -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 720b443c4528..d8afbb1269d5 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -4,6 +4,33 @@ #include <linux/smp.h> #include <linux/sched.h> #include <asm/sbi.h> +#include <asm/mmu_context.h> + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_range_asid(unsigned long start, + unsigned long size, unsigned long asid) +{ + unsigned long addr, end = ALIGN(start + size, PAGE_SIZE); + + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) + local_flush_tlb_page_asid(addr, asid); +} void flush_tlb_all(void) { @@ -12,28 +39,43 @@ void flush_tlb_all(void) /* * This function must not be called with cmask being null. - * Kernel may panic if cmask is NULL. */ -static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, +static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size) { + struct cpumask *cmask = mm_cpumask(mm); struct cpumask hmask; unsigned int cpuid; + bool broadcast; if (cpumask_empty(cmask)) return; cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + if (static_branch_unlikely(&use_asid_allocator)) { + unsigned long asid = atomic_long_read(&mm->context.id); - if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { - /* local cpu is the only cpu present in cpumask */ - if (size <= PAGE_SIZE) + if (broadcast) { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), + start, size, asid); + } else if (size != -1) { + local_flush_tlb_range_asid(start, size, asid); + } else { + local_flush_tlb_all_asid(asid); + } + } else { + if (broadcast) { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma(cpumask_bits(&hmask), + start, size); + } else if (size <= PAGE_SIZE) { local_flush_tlb_page(start); - else + } else { local_flush_tlb_all(); - } else { - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); + } } put_cpu(); @@ -41,16 +83,16 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, void flush_tlb_mm(struct mm_struct *mm) { - __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1); + __sbi_tlb_flush_range(mm, 0, -1); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE); + __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start); + __sbi_tlb_flush_range(vma->vm_mm, start, end - start); }
On Tue, 25 May 2021 22:49:20 PDT (-0700), guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > Kernel virtual address translation should avoid to use ASIDs or it'll > cause more TLB-miss and TLB-refill. Because the current ASID in satp > belongs to the current process, but the target kernel va TLB entry's > ASID still belongs to the previous process. Sorry, I still can't quite figure out what this is trying to say. I went ahead and re-wrote the commit text to riscv: Use global mappings for kernel pages We map kernel pages into all addresses spages, so they can be marked as global. This allows hardware to avoid flushing the kernel mappings when moving between address spaces. LMK if I'm misunderstanding something here, it's on for-next. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Reviewed-by: Anup Patel <anup@brainfault.org> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Cc: Palmer Dabbelt <palmerdabbelt@google.com> > --- > arch/riscv/include/asm/pgtable.h | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 9469f46..346a3c6 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -134,7 +134,8 @@ > | _PAGE_WRITE \ > | _PAGE_PRESENT \ > | _PAGE_ACCESSED \ > - | _PAGE_DIRTY) > + | _PAGE_DIRTY \ > + | _PAGE_GLOBAL) > > #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) > #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
On Thu, 27 May 2021 00:09:03 PDT (-0700), Christoph Hellwig wrote: > On Wed, May 26, 2021 at 05:49:21AM +0000, guoren@kernel.org wrote: >> From: Guo Ren <guoren@linux.alibaba.com> >> >> Use static_branch_unlikely(&use_asid_allocator) to keep the origin >> tlb flush style, so it's no effect on the existing machine. Here >> are the optimized functions: >> - flush_tlb_mm >> - flush_tlb_page >> - flush_tlb_range >> >> All above are based on the below new implement functions: >> - __sbi_tlb_flush_range_asid >> - local_flush_tlb_range_asid >> >> These functions are based on ASID instead of previous non-ASID >> tlb_flush implementation which invalidates more useful tlb >> entries. > > I still think the commit message is incomplete and rather misleading. > Here is what I'd come up with from reading the patch: > > --------- > Subject: add ASID-based tlbflushing methods > > Implement optimized version of the tlb flushing routines for systems > using ASIDs. These are behind the use_asid_allocator static branch to > not affect existing systems not using ASIDs. > --------- That seems much better, thanks. >> +static inline void local_flush_tlb_range_asid(unsigned long start, >> + unsigned long size, unsigned long asid) >> +{ >> + unsigned long tmp, end = ALIGN(start + size, PAGE_SIZE); >> + >> + for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) { >> + __asm__ __volatile__ ("sfence.vma %0, %1" >> + : >> + : "r" (tmp), "r" (asid) >> + : "memory"); >> + tmp += PAGE_SIZE; >> + } > > This double increments tmp. > > Also the non-ASID code switches to a global flush once flushing more > than a single page. It might be worth documenting the tradeoff in the > code. For some reason I thought we'd written this down in the commentary of teh ISA manual as the suggested huersitic here, but I can't find it so maybe I'm wrong. If it's actually there it would be good to point that out, otherwise some documentation seems fine as it'll probably become a tunable at some point anyway. >> +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, >> + unsigned long start, >> + unsigned long size, >> + unsigned long asid) >> +{ > > I don't think the calling conventions here are optimal. I'd pass > the mm_struct as the first argument, as we can derive both the cpumask > and asid from it instead of doing that in the callers. > > But more importantly I think the static branch check can be moved deeper > into the code to avoid a lot of duplication. What do you think of this > version? > > diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h > index b0659413a080..7030837adc1a 100644 > --- a/arch/riscv/include/asm/mmu_context.h > +++ b/arch/riscv/include/asm/mmu_context.h > @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, > return 0; > } > > +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); > + > #include <asm-generic/mmu_context.h> > > #endif /* _ASM_RISCV_MMU_CONTEXT_H */ > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > index 68aa312fc352..45c1b04b105d 100644 > --- a/arch/riscv/mm/context.c > +++ b/arch/riscv/mm/context.c > @@ -18,7 +18,7 @@ > > #ifdef CONFIG_MMU > > -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > > static unsigned long asid_bits; > static unsigned long num_asids; > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > index 720b443c4528..d8afbb1269d5 100644 > --- a/arch/riscv/mm/tlbflush.c > +++ b/arch/riscv/mm/tlbflush.c > @@ -4,6 +4,33 @@ > #include <linux/smp.h> > #include <linux/sched.h> > #include <asm/sbi.h> > +#include <asm/mmu_context.h> > + > +static inline void local_flush_tlb_all_asid(unsigned long asid) > +{ > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : > + : "r" (asid) > + : "memory"); > +} > + > +static inline void local_flush_tlb_page_asid(unsigned long addr, > + unsigned long asid) > +{ > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : > + : "r" (addr), "r" (asid) > + : "memory"); > +} > + > +static inline void local_flush_tlb_range_asid(unsigned long start, > + unsigned long size, unsigned long asid) > +{ > + unsigned long addr, end = ALIGN(start + size, PAGE_SIZE); > + > + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) > + local_flush_tlb_page_asid(addr, asid); > +} > > void flush_tlb_all(void) > { > @@ -12,28 +39,43 @@ void flush_tlb_all(void) > > /* > * This function must not be called with cmask being null. > - * Kernel may panic if cmask is NULL. > */ > -static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, > +static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, > unsigned long size) > { > + struct cpumask *cmask = mm_cpumask(mm); > struct cpumask hmask; > unsigned int cpuid; > + bool broadcast; > > if (cpumask_empty(cmask)) > return; > > cpuid = get_cpu(); > + /* check if the tlbflush needs to be sent to other CPUs */ > + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; > + if (static_branch_unlikely(&use_asid_allocator)) { > + unsigned long asid = atomic_long_read(&mm->context.id); > > - if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { > - /* local cpu is the only cpu present in cpumask */ > - if (size <= PAGE_SIZE) > + if (broadcast) { > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), > + start, size, asid); > + } else if (size != -1) { > + local_flush_tlb_range_asid(start, size, asid); > + } else { > + local_flush_tlb_all_asid(asid); > + } > + } else { > + if (broadcast) { > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > + sbi_remote_sfence_vma(cpumask_bits(&hmask), > + start, size); > + } else if (size <= PAGE_SIZE) { > local_flush_tlb_page(start); > - else > + } else { > local_flush_tlb_all(); > - } else { > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); > + } > } > > put_cpu(); > @@ -41,16 +83,16 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, > > void flush_tlb_mm(struct mm_struct *mm) > { > - __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1); > + __sbi_tlb_flush_range(mm, 0, -1); > } > > void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > { > - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE); > + __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE); > } > > void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > { > - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start); > + __sbi_tlb_flush_range(vma->vm_mm, start, end - start); > } LGTM. I took the first one as IMO they're really distnict issues, LMK if you want to re-spin this one or if I should just take what's here.
On Thu, May 27, 2021 at 3:09 PM Christoph Hellwig <hch@lst.de> wrote: > > On Wed, May 26, 2021 at 05:49:21AM +0000, guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > Use static_branch_unlikely(&use_asid_allocator) to keep the origin > > tlb flush style, so it's no effect on the existing machine. Here > > are the optimized functions: > > - flush_tlb_mm > > - flush_tlb_page > > - flush_tlb_range > > > > All above are based on the below new implement functions: > > - __sbi_tlb_flush_range_asid > > - local_flush_tlb_range_asid > > > > These functions are based on ASID instead of previous non-ASID > > tlb_flush implementation which invalidates more useful tlb > > entries. > > I still think the commit message is incomplete and rather misleading. > Here is what I'd come up with from reading the patch: > > --------- > Subject: add ASID-based tlbflushing methods > > Implement optimized version of the tlb flushing routines for systems > using ASIDs. These are behind the use_asid_allocator static branch to > not affect existing systems not using ASIDs. > --------- > > > > +static inline void local_flush_tlb_range_asid(unsigned long start, > > + unsigned long size, unsigned long asid) > > +{ > > + unsigned long tmp, end = ALIGN(start + size, PAGE_SIZE); > > + > > + for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) { > > + __asm__ __volatile__ ("sfence.vma %0, %1" > > + : > > + : "r" (tmp), "r" (asid) > > + : "memory"); > > + tmp += PAGE_SIZE; > > + } > > This double increments tmp. Yes, It's a bug for PATCH V4. Thx for point it out. > > Also the non-ASID code switches to a global flush once flushing more > than a single page. It might be worth documenting the tradeoff in the > code. > > > +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, > > + unsigned long start, > > + unsigned long size, > > + unsigned long asid) > > +{ > > I don't think the calling conventions here are optimal. I'd pass > the mm_struct as the first argument, as we can derive both the cpumask > and asid from it instead of doing that in the callers. > > But more importantly I think the static branch check can be moved deeper > into the code to avoid a lot of duplication. What do you think of this > version? Good idea, but I think "Modifying infrastructure and Adding ASID TLB flush" should be separated. I'll try in the next PATCH version. > > diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h > index b0659413a080..7030837adc1a 100644 > --- a/arch/riscv/include/asm/mmu_context.h > +++ b/arch/riscv/include/asm/mmu_context.h > @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, > return 0; > } > > +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); > + > #include <asm-generic/mmu_context.h> > > #endif /* _ASM_RISCV_MMU_CONTEXT_H */ > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > index 68aa312fc352..45c1b04b105d 100644 > --- a/arch/riscv/mm/context.c > +++ b/arch/riscv/mm/context.c > @@ -18,7 +18,7 @@ > > #ifdef CONFIG_MMU > > -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > > static unsigned long asid_bits; > static unsigned long num_asids; > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > index 720b443c4528..d8afbb1269d5 100644 > --- a/arch/riscv/mm/tlbflush.c > +++ b/arch/riscv/mm/tlbflush.c > @@ -4,6 +4,33 @@ > #include <linux/smp.h> > #include <linux/sched.h> > #include <asm/sbi.h> > +#include <asm/mmu_context.h> > + > +static inline void local_flush_tlb_all_asid(unsigned long asid) > +{ > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : > + : "r" (asid) > + : "memory"); > +} > + > +static inline void local_flush_tlb_page_asid(unsigned long addr, > + unsigned long asid) > +{ > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : > + : "r" (addr), "r" (asid) > + : "memory"); > +} > + > +static inline void local_flush_tlb_range_asid(unsigned long start, > + unsigned long size, unsigned long asid) > +{ > + unsigned long addr, end = ALIGN(start + size, PAGE_SIZE); > + > + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) > + local_flush_tlb_page_asid(addr, asid); > +} > > void flush_tlb_all(void) > { > @@ -12,28 +39,43 @@ void flush_tlb_all(void) > > /* > * This function must not be called with cmask being null. > - * Kernel may panic if cmask is NULL. > */ > -static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, > +static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, > unsigned long size) > { > + struct cpumask *cmask = mm_cpumask(mm); > struct cpumask hmask; > unsigned int cpuid; > + bool broadcast; > > if (cpumask_empty(cmask)) > return; > > cpuid = get_cpu(); > + /* check if the tlbflush needs to be sent to other CPUs */ > + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; > + if (static_branch_unlikely(&use_asid_allocator)) { > + unsigned long asid = atomic_long_read(&mm->context.id); > > - if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { > - /* local cpu is the only cpu present in cpumask */ > - if (size <= PAGE_SIZE) > + if (broadcast) { > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), > + start, size, asid); > + } else if (size != -1) { > + local_flush_tlb_range_asid(start, size, asid); > + } else { > + local_flush_tlb_all_asid(asid); > + } > + } else { > + if (broadcast) { > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > + sbi_remote_sfence_vma(cpumask_bits(&hmask), > + start, size); > + } else if (size <= PAGE_SIZE) { > local_flush_tlb_page(start); > - else > + } else { > local_flush_tlb_all(); > - } else { > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); > + } > } > > put_cpu(); > @@ -41,16 +83,16 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, > > void flush_tlb_mm(struct mm_struct *mm) > { > - __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1); > + __sbi_tlb_flush_range(mm, 0, -1); > } > > void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > { > - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE); > + __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE); > } > > void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > { > - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start); > + __sbi_tlb_flush_range(vma->vm_mm, start, end - start); > } -- Best Regards Guo Ren ML: https://lore.kernel.org/linux-csky/
On Sun, May 30, 2021 at 7:42 AM <palmerdabbelt@google.com> wrote: > > On Thu, 27 May 2021 00:09:03 PDT (-0700), Christoph Hellwig wrote: > > On Wed, May 26, 2021 at 05:49:21AM +0000, guoren@kernel.org wrote: > >> From: Guo Ren <guoren@linux.alibaba.com> > >> > >> Use static_branch_unlikely(&use_asid_allocator) to keep the origin > >> tlb flush style, so it's no effect on the existing machine. Here > >> are the optimized functions: > >> - flush_tlb_mm > >> - flush_tlb_page > >> - flush_tlb_range > >> > >> All above are based on the below new implement functions: > >> - __sbi_tlb_flush_range_asid > >> - local_flush_tlb_range_asid > >> > >> These functions are based on ASID instead of previous non-ASID > >> tlb_flush implementation which invalidates more useful tlb > >> entries. > > > > I still think the commit message is incomplete and rather misleading. > > Here is what I'd come up with from reading the patch: > > > > --------- > > Subject: add ASID-based tlbflushing methods > > > > Implement optimized version of the tlb flushing routines for systems > > using ASIDs. These are behind the use_asid_allocator static branch to > > not affect existing systems not using ASIDs. > > --------- > > That seems much better, thanks. > > >> +static inline void local_flush_tlb_range_asid(unsigned long start, > >> + unsigned long size, unsigned long asid) > >> +{ > >> + unsigned long tmp, end = ALIGN(start + size, PAGE_SIZE); > >> + > >> + for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) { > >> + __asm__ __volatile__ ("sfence.vma %0, %1" > >> + : > >> + : "r" (tmp), "r" (asid) > >> + : "memory"); > >> + tmp += PAGE_SIZE; > >> + } > > > > This double increments tmp. > > > > Also the non-ASID code switches to a global flush once flushing more > > than a single page. It might be worth documenting the tradeoff in the > > code. > > For some reason I thought we'd written this down in the commentary of > teh ISA manual as the suggested huersitic here, but I can't find it so > maybe I'm wrong. If it's actually there it would be good to point that > out, otherwise some documentation seems fine as it'll probably become a > tunable at some point anyway. From my view: "non-ASID code switches to a global flush all" is a hardware bug fixup, so I just reserved it to prevent breaking other machines. The comment for the "non-ASID code" should be another patch. > > >> +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, > >> + unsigned long start, > >> + unsigned long size, > >> + unsigned long asid) > >> +{ > > > > I don't think the calling conventions here are optimal. I'd pass > > the mm_struct as the first argument, as we can derive both the cpumask > > and asid from it instead of doing that in the callers. > > > > But more importantly I think the static branch check can be moved deeper > > into the code to avoid a lot of duplication. What do you think of this > > version? > > > > diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h > > index b0659413a080..7030837adc1a 100644 > > --- a/arch/riscv/include/asm/mmu_context.h > > +++ b/arch/riscv/include/asm/mmu_context.h > > @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, > > return 0; > > } > > > > +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); > > + > > #include <asm-generic/mmu_context.h> > > > > #endif /* _ASM_RISCV_MMU_CONTEXT_H */ > > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > > index 68aa312fc352..45c1b04b105d 100644 > > --- a/arch/riscv/mm/context.c > > +++ b/arch/riscv/mm/context.c > > @@ -18,7 +18,7 @@ > > > > #ifdef CONFIG_MMU > > > > -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > > +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > > > > static unsigned long asid_bits; > > static unsigned long num_asids; > > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > > index 720b443c4528..d8afbb1269d5 100644 > > --- a/arch/riscv/mm/tlbflush.c > > +++ b/arch/riscv/mm/tlbflush.c > > @@ -4,6 +4,33 @@ > > #include <linux/smp.h> > > #include <linux/sched.h> > > #include <asm/sbi.h> > > +#include <asm/mmu_context.h> > > + > > +static inline void local_flush_tlb_all_asid(unsigned long asid) > > +{ > > + __asm__ __volatile__ ("sfence.vma x0, %0" > > + : > > + : "r" (asid) > > + : "memory"); > > +} > > + > > +static inline void local_flush_tlb_page_asid(unsigned long addr, > > + unsigned long asid) > > +{ > > + __asm__ __volatile__ ("sfence.vma %0, %1" > > + : > > + : "r" (addr), "r" (asid) > > + : "memory"); > > +} > > + > > +static inline void local_flush_tlb_range_asid(unsigned long start, > > + unsigned long size, unsigned long asid) > > +{ > > + unsigned long addr, end = ALIGN(start + size, PAGE_SIZE); > > + > > + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) > > + local_flush_tlb_page_asid(addr, asid); > > +} > > > > void flush_tlb_all(void) > > { > > @@ -12,28 +39,43 @@ void flush_tlb_all(void) > > > > /* > > * This function must not be called with cmask being null. > > - * Kernel may panic if cmask is NULL. > > */ > > -static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, > > +static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, > > unsigned long size) > > { > > + struct cpumask *cmask = mm_cpumask(mm); > > struct cpumask hmask; > > unsigned int cpuid; > > + bool broadcast; > > > > if (cpumask_empty(cmask)) > > return; > > > > cpuid = get_cpu(); > > + /* check if the tlbflush needs to be sent to other CPUs */ > > + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; > > + if (static_branch_unlikely(&use_asid_allocator)) { > > + unsigned long asid = atomic_long_read(&mm->context.id); > > > > - if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { > > - /* local cpu is the only cpu present in cpumask */ > > - if (size <= PAGE_SIZE) > > + if (broadcast) { > > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > > + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), > > + start, size, asid); > > + } else if (size != -1) { > > + local_flush_tlb_range_asid(start, size, asid); > > + } else { > > + local_flush_tlb_all_asid(asid); > > + } > > + } else { > > + if (broadcast) { > > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > > + sbi_remote_sfence_vma(cpumask_bits(&hmask), > > + start, size); > > + } else if (size <= PAGE_SIZE) { > > local_flush_tlb_page(start); > > - else > > + } else { > > local_flush_tlb_all(); > > - } else { > > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > > - sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); > > + } > > } > > > > put_cpu(); > > @@ -41,16 +83,16 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, > > > > void flush_tlb_mm(struct mm_struct *mm) > > { > > - __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1); > > + __sbi_tlb_flush_range(mm, 0, -1); > > } > > > > void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > > { > > - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE); > > + __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE); > > } > > > > void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > > unsigned long end) > > { > > - __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start); > > + __sbi_tlb_flush_range(vma->vm_mm, start, end - start); > > } > > LGTM. I took the first one as IMO they're really distnict issues, LMK > if you want to re-spin this one or if I should just take what's here. -- Best Regards Guo Ren ML: https://lore.kernel.org/linux-csky/
On Sun, May 30, 2021 at 7:42 AM <palmerdabbelt@google.com> wrote: > > On Tue, 25 May 2021 22:49:20 PDT (-0700), guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > Kernel virtual address translation should avoid to use ASIDs or it'll > > cause more TLB-miss and TLB-refill. Because the current ASID in satp > > belongs to the current process, but the target kernel va TLB entry's > > ASID still belongs to the previous process. > > Sorry, I still can't quite figure out what this is trying to say. I > went ahead and re-wrote the commit text to > > riscv: Use global mappings for kernel pages > > We map kernel pages into all addresses spages, so they can be marked as > global. This allows hardware to avoid flushing the kernel mappings when > moving between address spaces. Okay > > LMK if I'm misunderstanding something here, it's on for-next. > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > Reviewed-by: Anup Patel <anup@brainfault.org> > > Reviewed-by: Christoph Hellwig <hch@lst.de> > > Cc: Palmer Dabbelt <palmerdabbelt@google.com> > > --- > > arch/riscv/include/asm/pgtable.h | 3 ++- > > 1 file changed, 2 insertions(+), 1 deletion(-) > > > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > > index 9469f46..346a3c6 100644 > > --- a/arch/riscv/include/asm/pgtable.h > > +++ b/arch/riscv/include/asm/pgtable.h > > @@ -134,7 +134,8 @@ > > | _PAGE_WRITE \ > > | _PAGE_PRESENT \ > > | _PAGE_ACCESSED \ > > - | _PAGE_DIRTY) > > + | _PAGE_DIRTY \ > > + | _PAGE_GLOBAL) > > > > #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) > > #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) -- Best Regards Guo Ren ML: https://lore.kernel.org/linux-csky/
On Sat, May 29, 2021 at 04:42:37PM -0700, palmerdabbelt@google.com wrote: >> >> Also the non-ASID code switches to a global flush once flushing more >> than a single page. It might be worth documenting the tradeoff in the >> code. > > For some reason I thought we'd written this down in the commentary of teh > ISA manual as the suggested huersitic here, but I can't find it so maybe > I'm wrong. If it's actually there it would be good to point that out, > otherwise some documentation seems fine as it'll probably become a tunable > at some point anyway. The real question is why is the heuristic different for the ASID vs non-ASID case? I think that really need a comment. > LGTM. I took the first one as IMO they're really distnict issues, LMK if > you want to re-spin this one or if I should just take what's here. What distinct issue? The fact that the new code is buggy and uses rather non-optimal calling conventions?