On Mon, Feb 15, 2021 at 12:05:40PM +0530, Bharata B Rao wrote: > H_RPT_INVALIDATE hcall needs to perform process scoped tlbie > invalidations of L1 and nested guests from L0. This needs RS register > for TLBIE instruction to contain both PID and LPID. Introduce > primitives that execute tlbie instruction with both PID > and LPID set in prepartion for H_RPT_INVALIDATE hcall. > > While we are here, move RIC_FLUSH definitions to header file > and introduce helper rpti_pgsize_to_psize() that will be needed > by the upcoming hcall. > > Signed-off-by: Bharata B Rao > --- > .../include/asm/book3s/64/tlbflush-radix.h | 18 +++ > arch/powerpc/mm/book3s64/radix_tlb.c | 122 +++++++++++++++++- > 2 files changed, 136 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > index 94439e0cefc9..aace7e9b2397 100644 > --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > @@ -4,6 +4,10 @@ > > #include > > +#define RIC_FLUSH_TLB 0 > +#define RIC_FLUSH_PWC 1 > +#define RIC_FLUSH_ALL 2 > + > struct vm_area_struct; > struct mm_struct; > struct mmu_gather; > @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize) > return H_RPTI_PAGE_ALL; > } > > +static inline int rpti_pgsize_to_psize(unsigned long page_size) > +{ > + if (page_size == H_RPTI_PAGE_4K) > + return MMU_PAGE_4K; > + if (page_size == H_RPTI_PAGE_64K) > + return MMU_PAGE_64K; > + if (page_size == H_RPTI_PAGE_2M) > + return MMU_PAGE_2M; > + if (page_size == H_RPTI_PAGE_1G) > + return MMU_PAGE_1G; > + else > + return MMU_PAGE_64K; /* Default */ > +} Would it make sense to put the H_RPT_PAGE_ tags into the mmu_psize_defs table and scan that here, rather than open coding the conversion? > + > static inline int mmu_get_ap(int psize) > { > return mmu_psize_defs[psize].ap; > diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c > index fb66d154b26c..097402435303 100644 > --- a/arch/powerpc/mm/book3s64/radix_tlb.c > +++ b/arch/powerpc/mm/book3s64/radix_tlb.c > @@ -18,10 +18,6 @@ > #include > #include > > -#define RIC_FLUSH_TLB 0 > -#define RIC_FLUSH_PWC 1 > -#define RIC_FLUSH_ALL 2 > - > /* > * tlbiel instruction for radix, set invalidation > * i.e., r=1 and is=01 or is=10 or is=11 > @@ -128,6 +124,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) > trace_tlbie(0, 0, rb, rs, ric, prs, r); > } > > +static __always_inline void __tlbie_pid_lpid(unsigned long pid, > + unsigned long lpid, > + unsigned long ric) > +{ > + unsigned long rb, rs, prs, r; > + > + rb = PPC_BIT(53); /* IS = 1 */ > + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); > + prs = 1; /* process scoped */ > + r = 1; /* radix format */ > + > + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); > + trace_tlbie(0, 0, rb, rs, ric, prs, r); > +} > static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) > { > unsigned long rb,rs,prs,r; > @@ -188,6 +199,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, > trace_tlbie(0, 0, rb, rs, ric, prs, r); > } > > +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, > + unsigned long lpid, > + unsigned long ap, unsigned long ric) > +{ > + unsigned long rb, rs, prs, r; > + > + rb = va & ~(PPC_BITMASK(52, 63)); > + rb |= ap << PPC_BITLSHIFT(58); > + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); > + prs = 1; /* process scoped */ > + r = 1; /* radix format */ > + > + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); > + trace_tlbie(0, 0, rb, rs, ric, prs, r); > +} > + > static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, > unsigned long ap, unsigned long ric) > { > @@ -233,6 +261,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, > } > } > > +static inline void fixup_tlbie_va_range_lpid(unsigned long va, > + unsigned long pid, > + unsigned long lpid, > + unsigned long ap) > +{ > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); > + } > + > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); > + } > +} > + > static inline void fixup_tlbie_pid(unsigned long pid) > { > /* > @@ -252,6 +296,25 @@ static inline void fixup_tlbie_pid(unsigned long pid) > } > } > > +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) > +{ > + /* > + * We can use any address for the invalidation, pick one which is > + * probably unused as an optimisation. > + */ > + unsigned long va = ((1UL << 52) - 1); > + > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); > + } > + > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), > + RIC_FLUSH_TLB); > + } > +} > > static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, > unsigned long ap) > @@ -342,6 +405,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) > asm volatile("eieio; tlbsync; ptesync": : :"memory"); > } > > +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, > + unsigned long ric) > +{ > + asm volatile("ptesync" : : : "memory"); > + > + /* > + * Workaround the fact that the "ric" argument to __tlbie_pid > + * must be a compile-time contraint to match the "i" constraint > + * in the asm statement. > + */ > + switch (ric) { > + case RIC_FLUSH_TLB: > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); > + fixup_tlbie_pid_lpid(pid, lpid); > + break; > + case RIC_FLUSH_PWC: > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); > + break; > + case RIC_FLUSH_ALL: > + default: > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); > + fixup_tlbie_pid_lpid(pid, lpid); > + } > + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); > +} > struct tlbiel_pid { > unsigned long pid; > unsigned long ric; > @@ -467,6 +555,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, > fixup_tlbie_va_range(addr - page_size, pid, ap); > } > > +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, > + unsigned long pid, unsigned long lpid, > + unsigned long page_size, > + unsigned long psize) > +{ > + unsigned long addr; > + unsigned long ap = mmu_get_ap(psize); > + > + for (addr = start; addr < end; addr += page_size) > + __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); > + > + fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); > +} > + > static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, > unsigned long psize, unsigned long ric) > { > @@ -547,6 +649,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, > asm volatile("eieio; tlbsync; ptesync": : :"memory"); > } > > +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, > + unsigned long pid, unsigned long lpid, > + unsigned long page_size, > + unsigned long psize, bool also_pwc) > +{ > + asm volatile("ptesync" : : : "memory"); > + if (also_pwc) > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); > + __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); > + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); > +} > + > static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, > unsigned long start, unsigned long end, > unsigned long pid, unsigned long page_size, -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson