On Mon, Jun 21, 2021 at 02:20:00PM +0530, Bharata B Rao wrote: > H_RPT_INVALIDATE does two types of TLB invalidations: > > 1. Process-scoped invalidations for guests when LPCR[GTSE]=0. > This is currently not used in KVM as GTSE is not usually > disabled in KVM. > 2. Partition-scoped invalidations that an L1 hypervisor does on > behalf of an L2 guest. This is currently handled > by H_TLB_INVALIDATE hcall and this new replaces the old that. > > This commit enables process-scoped invalidations for L1 guests. > Support for process-scoped and partition-scoped invalidations > from/for nested guests will be added separately. > > Process scoped tlbie invalidations from L1 and nested guests > need RS register for TLBIE instruction to contain both PID and > LPID. This patch introduces primitives that execute tlbie > instruction with both PID and LPID set in prepartion for > H_RPT_INVALIDATE hcall. > > A description of H_RPT_INVALIDATE follows: > > int64   /* H_Success: Return code on successful completion */ >         /* H_Busy - repeat the call with the same */ >         /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid > parameters */ > hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT > translation > lookaside information */ >       uint64 id,        /* PID/LPID to invalidate */ >       uint64 target,    /* Invalidation target */ >       uint64 type,      /* Type of lookaside information */ >       uint64 pg_sizes, /* Page sizes */ >       uint64 start,     /* Start of Effective Address (EA) > range (inclusive) */ >       uint64 end)       /* End of EA range (exclusive) */ > > Invalidation targets (target) > ----------------------------- > Core MMU        0x01 /* All virtual processors in the > partition */ > Core local MMU  0x02 /* Current virtual processor */ > Nest MMU        0x04 /* All nest/accelerator agents > in use by the partition */ > > A combination of the above can be specified, > except core and core local. > > Type of translation to invalidate (type) > --------------------------------------- > NESTED       0x0001  /* invalidate nested guest partition-scope */ > TLB          0x0002  /* Invalidate TLB */ > PWC          0x0004  /* Invalidate Page Walk Cache */ > PRT          0x0008  /* Invalidate caching of Process Table > Entries if NESTED is clear */ > PAT          0x0008  /* Invalidate caching of Partition Table > Entries if NESTED is set */ > > A combination of the above can be specified. > > Page size mask (pages) > ---------------------- > 4K              0x01 > 64K             0x02 > 2M              0x04 > 1G              0x08 > All sizes       (-1UL) > > A combination of the above can be specified. > All page sizes can be selected with -1. > > Semantics: Invalidate radix tree lookaside information >            matching the parameters given. > * Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters > are different from the defined values. > * Return H_PARAMETER if NESTED is set and pid is not a valid nested > LPID allocated to this partition > * Return H_P5 if (start, end) doesn't form a valid range. Start and > end should be a valid Quadrant address and  end > start. > * Return H_NotSupported if the partition is not in running in radix > translation mode. > * May invalidate more translation information than requested. > * If start = 0 and end = -1, set the range to cover all valid > addresses. Else start and end should be aligned to 4kB (lower 11 > bits clear). > * If NESTED is clear, then invalidate process scoped lookaside > information. Else pid specifies a nested LPID, and the invalidation > is performed   on nested guest partition table and nested guest > partition scope real addresses. > * If pid = 0 and NESTED is clear, then valid addresses are quadrant 3 > and quadrant 0 spaces, Else valid addresses are quadrant 0. > * Pages which are fully covered by the range are to be invalidated. >   Those which are partially covered are considered outside > invalidation range, which allows a caller to optimally invalidate > ranges that may   contain mixed page sizes. > * Return H_SUCCESS on success. > > Signed-off-by: Bharata B Rao > --- > arch/powerpc/include/asm/mmu_context.h | 9 ++ > arch/powerpc/kvm/book3s_hv.c | 36 ++++++ > arch/powerpc/mm/book3s64/radix_tlb.c | 172 +++++++++++++++++++++++++ > 3 files changed, 217 insertions(+) > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h > index 4bc45d3ed8b0..b44f291fc909 100644 > --- a/arch/powerpc/include/asm/mmu_context.h > +++ b/arch/powerpc/include/asm/mmu_context.h > @@ -124,8 +124,17 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) > > #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) > extern void radix_kvm_prefetch_workaround(struct mm_struct *mm); > +void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, > + unsigned long type, unsigned long pg_sizes, > + unsigned long start, unsigned long end); > #else > static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { } > +static inline void do_h_rpt_invalidate_prt(unsigned long pid, > + unsigned long lpid, > + unsigned long type, > + unsigned long pg_sizes, > + unsigned long start, > + unsigned long end) { } Since the only plausible caller is in KVM HV code, why do you need the #else clause. > #endif > > extern void switch_cop(struct mm_struct *next); > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index bc0813644666..7e6da4687d88 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -76,6 +76,7 @@ > #include > #include > #include > +#include > > #include "book3s.h" > > @@ -924,6 +925,32 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) > return yield_count; > } > > +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu, > + unsigned long id, unsigned long target, > + unsigned long type, unsigned long pg_sizes, > + unsigned long start, unsigned long end) > +{ > + if (!kvm_is_radix(vcpu->kvm)) > + return H_UNSUPPORTED; > + > + if (end < start) > + return H_P5; > + > + /* > + * Partition-scoped invalidation for nested guests. > + * Not yet supported > + */ > + if (type & H_RPTI_TYPE_NESTED) > + return H_P3; > + > + /* > + * Process-scoped invalidation for L1 guests. > + */ > + do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid, > + type, pg_sizes, start, end); > + return H_SUCCESS; > +} > + > int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > { > unsigned long req = kvmppc_get_gpr(vcpu, 3); > @@ -1132,6 +1159,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > */ > ret = kvmppc_h_svm_init_abort(vcpu->kvm); > break; > + case H_RPT_INVALIDATE: > + ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4), > + kvmppc_get_gpr(vcpu, 5), > + kvmppc_get_gpr(vcpu, 6), > + kvmppc_get_gpr(vcpu, 7), > + kvmppc_get_gpr(vcpu, 8), > + kvmppc_get_gpr(vcpu, 9)); > + break; > > default: > return RESUME_HOST; > @@ -1178,6 +1213,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) > case H_XIRR_X: > #endif > case H_PAGE_INIT: > + case H_RPT_INVALIDATE: > return 1; > } > > diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c > index 409e61210789..cdd98b9e7b15 100644 > --- a/arch/powerpc/mm/book3s64/radix_tlb.c > +++ b/arch/powerpc/mm/book3s64/radix_tlb.c > @@ -130,6 +130,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) > trace_tlbie(0, 0, rb, rs, ric, prs, r); > } > > +static __always_inline void __tlbie_pid_lpid(unsigned long pid, > + unsigned long lpid, > + unsigned long ric) > +{ > + unsigned long rb, rs, prs, r; > + > + rb = PPC_BIT(53); /* IS = 1 */ > + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); > + prs = 1; /* process scoped */ > + r = 1; /* radix format */ > + > + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); > + trace_tlbie(0, 0, rb, rs, ric, prs, r); > +} > static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) > { > unsigned long rb,rs,prs,r; > @@ -190,6 +205,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, > trace_tlbie(0, 0, rb, rs, ric, prs, r); > } > > +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, > + unsigned long lpid, > + unsigned long ap, unsigned long ric) > +{ > + unsigned long rb, rs, prs, r; > + > + rb = va & ~(PPC_BITMASK(52, 63)); > + rb |= ap << PPC_BITLSHIFT(58); > + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); > + prs = 1; /* process scoped */ > + r = 1; /* radix format */ > + > + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); > + trace_tlbie(0, 0, rb, rs, ric, prs, r); > +} > + > static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, > unsigned long ap, unsigned long ric) > { > @@ -235,6 +267,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, > } > } > > +static inline void fixup_tlbie_va_range_lpid(unsigned long va, > + unsigned long pid, > + unsigned long lpid, > + unsigned long ap) > +{ > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); > + } > + > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); > + } > +} > + > static inline void fixup_tlbie_pid(unsigned long pid) > { > /* > @@ -254,6 +302,25 @@ static inline void fixup_tlbie_pid(unsigned long pid) > } > } > > +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) > +{ > + /* > + * We can use any address for the invalidation, pick one which is > + * probably unused as an optimisation. > + */ > + unsigned long va = ((1UL << 52) - 1); > + > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); > + } > + > + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { > + asm volatile("ptesync" : : : "memory"); > + __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), > + RIC_FLUSH_TLB); > + } > +} > > static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, > unsigned long ap) > @@ -344,6 +411,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) > asm volatile("eieio; tlbsync; ptesync": : :"memory"); > } > > +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, > + unsigned long ric) > +{ > + asm volatile("ptesync" : : : "memory"); > + > + /* > + * Workaround the fact that the "ric" argument to __tlbie_pid > + * must be a compile-time contraint to match the "i" constraint > + * in the asm statement. > + */ > + switch (ric) { > + case RIC_FLUSH_TLB: > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); > + fixup_tlbie_pid_lpid(pid, lpid); > + break; > + case RIC_FLUSH_PWC: > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); > + break; > + case RIC_FLUSH_ALL: > + default: > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); > + fixup_tlbie_pid_lpid(pid, lpid); > + } > + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); > +} > struct tlbiel_pid { > unsigned long pid; > unsigned long ric; > @@ -469,6 +561,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, > fixup_tlbie_va_range(addr - page_size, pid, ap); > } > > +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, > + unsigned long pid, unsigned long lpid, > + unsigned long page_size, > + unsigned long psize) > +{ > + unsigned long addr; > + unsigned long ap = mmu_get_ap(psize); > + > + for (addr = start; addr < end; addr += page_size) > + __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); > + > + fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); > +} > + > static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, > unsigned long psize, unsigned long ric) > { > @@ -549,6 +655,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, > asm volatile("eieio; tlbsync; ptesync": : :"memory"); > } > > +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, > + unsigned long pid, unsigned long lpid, > + unsigned long page_size, > + unsigned long psize, bool also_pwc) > +{ > + asm volatile("ptesync" : : : "memory"); > + if (also_pwc) > + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); > + __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); > + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); > +} > + > static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, > unsigned long start, unsigned long end, > unsigned long pid, unsigned long page_size, > @@ -1381,4 +1499,58 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) > } > } > EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); > + > +/* > + * Performs process-scoped invalidations for a given LPID > + * as part of H_RPT_INVALIDATE hcall. > + */ > +void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, > + unsigned long type, unsigned long pg_sizes, > + unsigned long start, unsigned long end) > +{ > + unsigned long psize, nr_pages; > + struct mmu_psize_def *def; > + bool flush_pid; > + > + /* > + * A H_RPTI_TYPE_ALL request implies RIC=3, hence > + * do a single IS=1 based flush. > + */ > + if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { > + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); > + return; > + } > + > + if (type & H_RPTI_TYPE_PWC) > + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); > + > + /* Full PID flush */ > + if (start == 0 && end == -1) > + return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); > + > + /* Do range invalidation for all the valid page sizes */ > + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { > + def = &mmu_psize_defs[psize]; > + if (!(pg_sizes & def->h_rpt_pgsize)) > + continue; > + > + nr_pages = (end - start) >> def->shift; > + flush_pid = nr_pages > tlb_single_page_flush_ceiling; > + > + /* > + * If the number of pages spanning the range is above > + * the ceiling, convert the request into a full PID flush. > + * And since PID flush takes out all the page sizes, there > + * is no need to consider remaining page sizes. > + */ > + if (flush_pid) { > + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); > + return; > + } > + _tlbie_va_range_lpid(start, end, pid, lpid, > + (1UL << def->shift), psize, false); > + } > +} > +EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); > + > #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson