All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/3] Support for H_RPT_INVALIDATE in PowerPC KVM
@ 2021-02-15  6:47 ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:35 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

This patchset adds support for the new hcall H_RPT_INVALIDATE
and replaces the nested tlb flush calls with this new hcall
if support for the same exists.

Changes in v4:
-------------
- While reusing the tlb flush routines from radix_tlb.c in v3,
  setting of LPID got missed out. Take care of this by
  introducing new flush routines that set both PID and LPID
  when using tlbie instruction. This is required for
  process-scoped invalidations from guests (both L1 and
  nested guests). Added a new patch 1/3 for this.
- Added code to handle H_RPT_INVALIDATE hcall issued
  by nested guest in L0 nested guest exit path.

v3: https://lore.kernel.org/linuxppc-dev/20210105090557.2150104-1-bharata@linux.ibm.com/T/#t

Bharata B Rao (3):
  powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped
    invalidations from guests
  KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

 Documentation/virt/kvm/api.rst                |  17 ++
 .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
 arch/powerpc/include/asm/kvm_book3s.h         |   3 +
 arch/powerpc/include/asm/mmu_context.h        |  11 ++
 arch/powerpc/kvm/book3s_64_mmu_radix.c        |  27 +++-
 arch/powerpc/kvm/book3s_hv.c                  |  91 +++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c           | 108 ++++++++++++-
 arch/powerpc/kvm/powerpc.c                    |   3 +
 arch/powerpc/mm/book3s64/radix_tlb.c          | 147 +++++++++++++++++-
 include/uapi/linux/kvm.h                      |   1 +
 10 files changed, 415 insertions(+), 11 deletions(-)

-- 
2.26.2


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests
  2021-02-15  6:47 ` Bharata B Rao
@ 2021-02-15  6:47   ` Bharata B Rao
  -1 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:35 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

H_RPT_INVALIDATE hcall needs to perform process scoped tlbie
invalidations of L1 and nested guests from L0. This needs RS register
for TLBIE instruction to contain both PID and LPID. Introduce
primitives that execute tlbie instruction with both PID
and LPID set in prepartion for H_RPT_INVALIDATE hcall.

While we are here, move RIC_FLUSH definitions to header file
and introduce helper rpti_pgsize_to_psize() that will be needed
by the upcoming hcall.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
 arch/powerpc/mm/book3s64/radix_tlb.c          | 122 +++++++++++++++++-
 2 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 94439e0cefc9..aace7e9b2397 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
 
 #include <asm/hvcall.h>
 
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
@@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
 	return H_RPTI_PAGE_ALL;
 }
 
+static inline int rpti_pgsize_to_psize(unsigned long page_size)
+{
+	if (page_size == H_RPTI_PAGE_4K)
+		return MMU_PAGE_4K;
+	if (page_size == H_RPTI_PAGE_64K)
+		return MMU_PAGE_64K;
+	if (page_size == H_RPTI_PAGE_2M)
+		return MMU_PAGE_2M;
+	if (page_size == H_RPTI_PAGE_1G)
+		return MMU_PAGE_1G;
+	else
+		return MMU_PAGE_64K; /* Default */
+}
+
 static inline int mmu_get_ap(int psize)
 {
 	return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index fb66d154b26c..097402435303 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -18,10 +18,6 @@
 #include <asm/cputhreads.h>
 #include <asm/plpar_wrappers.h>
 
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
 /*
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
@@ -128,6 +124,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
 {
 	unsigned long rb,rs,prs,r;
@@ -188,6 +199,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+					    unsigned long lpid,
+					    unsigned long ap, unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
 					    unsigned long ap, unsigned long ric)
 {
@@ -233,6 +261,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
 	}
 }
 
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+					     unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+	}
+}
+
 static inline void fixup_tlbie_pid(unsigned long pid)
 {
 	/*
@@ -252,6 +296,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
 	}
 }
 
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+				RIC_FLUSH_TLB);
+	}
+}
 
 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
 				       unsigned long ap)
@@ -342,6 +405,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+				   unsigned long ric)
+{
+	asm volatile("ptesync" : : : "memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+		fixup_tlbie_pid_lpid(pid, lpid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		fixup_tlbie_pid_lpid(pid, lpid);
+	}
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
 struct tlbiel_pid {
 	unsigned long pid;
 	unsigned long ric;
@@ -467,6 +555,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
 	fixup_tlbie_va_range(addr - page_size, pid, ap);
 }
 
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					 unsigned long pid, unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 				      unsigned long psize, unsigned long ric)
 {
@@ -547,6 +649,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					unsigned long pid, unsigned long lpid,
+					unsigned long page_size,
+					unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync" : : : "memory");
+	if (also_pwc)
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
 				unsigned long start, unsigned long end,
 				unsigned long pid, unsigned long page_size,
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  2021-02-15  6:47 ` Bharata B Rao
@ 2021-02-15  6:47   ` Bharata B Rao
  -1 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:35 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

Implement H_RPT_INVALIDATE hcall and add KVM capability
KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.

This hcall does two types of TLB invalidations:

1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
   This is currently not used in KVM as GTSE is not usually
   disabled in KVM.
2. Partition-scoped invalidations that an L1 hypervisor does on
   behalf of an L2 guest. This replaces the uses of the existing
   hcall H_TLB_INVALIDATE.

In order to handle process scoped invalidations of L2, we
intercept the nested exit handling code in L0 only to handle
H_TLB_INVALIDATE hcall.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 Documentation/virt/kvm/api.rst         | 17 +++++
 arch/powerpc/include/asm/kvm_book3s.h  |  3 +
 arch/powerpc/include/asm/mmu_context.h | 11 +++
 arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c             |  3 +
 arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
 include/uapi/linux/kvm.h               |  1 +
 8 files changed, 247 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 99ceb978c8b0..416c36aa35d4 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
 can then handle to implement model specific MSR handling and/or user notifications
 to inform a user that an MSR was not handled.
 
+7.22 KVM_CAP_PPC_RPT_INVALIDATE
+------------------------------
+
+:Capability: KVM_CAP_PPC_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is always enabled.
+
 8. Other capabilities.
 ======================
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index d32ec9ae73bd..0f1c5fa6e8ce 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+			 unsigned long type, unsigned long pg_sizes,
+			 unsigned long start, unsigned long end);
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
 			  u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index d5821834dba9..fbf3b5b45fe9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
 
 #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
+void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
+			 unsigned long type, unsigned long page_size,
+			 unsigned long psize, unsigned long start,
+			 unsigned long end);
 #else
 static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
+static inline void do_h_rpt_invalidate(unsigned long pid,
+				       unsigned long lpid,
+				       unsigned long type,
+				       unsigned long page_size,
+				       unsigned long psize,
+				       unsigned long start,
+				       unsigned long end) { }
 #endif
 
 extern void switch_cop(struct mm_struct *next);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6f612d240392..802cb77c39cc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 	return yield_count;
 }
 
+static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	unsigned long psize;
+
+	if (pg_sizes & H_RPTI_PAGE_64K) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
+		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
+				    start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_2M) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
+		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
+				    start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_1G) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
+		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
+				    start, end);
+	}
+
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+				    unsigned long pid, unsigned long target,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	if (!kvm_is_radix(vcpu->kvm))
+		return H_UNSUPPORTED;
+
+	if (kvmhv_on_pseries())
+		return H_UNSUPPORTED;
+
+	if (end < start)
+		return H_P5;
+
+	if (type & H_RPTI_TYPE_NESTED) {
+		if (!nesting_enabled(vcpu->kvm))
+			return H_FUNCTION;
+
+		/* Support only cores as target */
+		if (target != H_RPTI_TARGET_CMMU)
+			return H_P2;
+
+		return kvmhv_h_rpti_nested(vcpu, pid,
+					   (type & ~H_RPTI_TYPE_NESTED),
+					    pg_sizes, start, end);
+	}
+
+	do_h_rpt_invalidate_prs(pid, vcpu->kvm->arch.lpid, type, pg_sizes,
+				start, end);
+	return H_SUCCESS;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -1112,6 +1170,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		 */
 		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
 		break;
+	case H_RPT_INVALIDATE:
+		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+					      kvmppc_get_gpr(vcpu, 5),
+					      kvmppc_get_gpr(vcpu, 6),
+					      kvmppc_get_gpr(vcpu, 7),
+					      kvmppc_get_gpr(vcpu, 8),
+					      kvmppc_get_gpr(vcpu, 9));
+		break;
 
 	default:
 		return RESUME_HOST;
@@ -1158,6 +1224,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_XIRR_X:
 #endif
 	case H_PAGE_INIT:
+	case H_RPT_INVALIDATE:
 		return 1;
 	}
 
@@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		if (!xics_on_xive())
 			kvmppc_xics_rm_complete(vcpu, 0);
 		break;
+	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+		if (req != H_RPT_INVALIDATE) {
+			r = RESUME_HOST;
+			break;
+		}
+
+		/*
+		 * The H_RPT_INVALIDATE hcalls issued by nested
+		 * guest for process scoped invalidations when
+		 * GTSE=0 are handled here.
+		 */
+		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
+					vcpu->arch.nested->shadow_lpid,
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7),
+					kvmppc_get_gpr(vcpu, 8));
+		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+		r = RESUME_GUEST;
+		break;
+	}
 	default:
 		r = RESUME_HOST;
 		break;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 33b58549a9aa..40ed4eb80adb 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1149,6 +1149,102 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
 	return H_SUCCESS;
 }
 
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+					 unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long ap,
+					 unsigned long start,
+					 unsigned long end)
+{
+	unsigned long addr = start;
+	int ret;
+
+	do {
+		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+						   get_epn(addr));
+		if (ret)
+			return ret;
+		addr += page_size;
+	} while (addr < end);
+
+	return ret;
+}
+
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+					 unsigned long lpid)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (gp) {
+		kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
+		kvmhv_put_nested(gp);
+	}
+	return H_SUCCESS;
+}
+
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+			 unsigned long type, unsigned long pg_sizes,
+			 unsigned long start, unsigned long end)
+{
+	struct kvm_nested_guest *gp;
+	long ret;
+	unsigned long psize, ap;
+
+	/*
+	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
+	 *
+	 * However, nested KVM issues a L2 lpid flush call when creating
+	 * partition table entries for L2. This happens even before the
+	 * corresponding shadow lpid is created in HV which happens in
+	 * H_ENTER_NESTED call. Since we can't differentiate this case from
+	 * the invalid case, we ignore such flush requests and return success.
+	 */
+	gp = kvmhv_find_nested(vcpu->kvm, lpid);
+	if (!gp)
+		return H_SUCCESS;
+
+	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+		return do_tlb_invalidate_nested_all(vcpu, lpid);
+
+	if ((type & H_RPTI_TYPE_TLB) == H_RPTI_TYPE_TLB) {
+		if (pg_sizes & H_RPTI_PAGE_64K) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 16),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+
+		if (pg_sizes & H_RPTI_PAGE_2M) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 21),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+
+		if (pg_sizes & H_RPTI_PAGE_1G) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 30),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+	}
+	return H_SUCCESS;
+}
+
 /* Used to convert a nested guest real address to a L1 guest real address */
 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
 				       struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cf52d26f49cd..5388cd4a206a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
 			!kvmppc_hv_ops->enable_svm(NULL);
 		break;
+	case KVM_CAP_PPC_RPT_INVALIDATE:
+		r = 1;
+		break;
 #endif
 	default:
 		r = 0;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 097402435303..4f746d34b420 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1400,4 +1400,29 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 	}
 }
 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+
+/*
+ * Process-scoped invalidations for a given LPID.
+ */
+void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
+			 unsigned long type, unsigned long page_size,
+			 unsigned long psize, unsigned long start,
+			 unsigned long end)
+{
+	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		return;
+	}
+
+	if (type & H_RPTI_TYPE_PWC)
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+
+	if (!start && end == -1) /* PID */
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+	else /* EA */
+		_tlbie_va_range_lpid(start, end, pid, lpid, page_size,
+				     psize, false);
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate);
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 374c67875cdb..6fd530fae452 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1058,6 +1058,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PPC_RPT_INVALIDATE 193
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 3/3] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM
  2021-02-15  6:47 ` Bharata B Rao
@ 2021-02-15  6:47   ` Bharata B Rao
  -1 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:35 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

In the nested KVM case, replace H_TLB_INVALIDATE by the new hcall
H_RPT_INVALIDATE if available. The availability of this hcall
is determined from "hcall-rpt-invalidate" string in ibm,hypertas-functions
DT property.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 27 +++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_nested.c    | 12 ++++++++++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..7ea5459022cb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
 #include <asm/pte-walk.h>
 #include <asm/ultravisor.h>
 #include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
 
 /*
  * Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	}
 
 	psi = shift_to_mmu_psize(pshift);
-	rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
-				lpid, rb);
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+		rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+					lpid, rb);
+	} else {
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB,
+					    psize_to_rpti_pgsize(psi),
+					    addr, addr + psize);
+	}
+
 	if (rc)
 		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+					    0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 40ed4eb80adb..0ebddb615684 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pte-walk.h>
 #include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
 
 static struct patb_entry *pseries_partition_tb;
 
@@ -402,8 +403,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					    H_RPTI_TYPE_PAT,
+					    H_RPTI_PAGE_ALL, 0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 }
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 0/3] Support for H_RPT_INVALIDATE in PowerPC KVM
@ 2021-02-15  6:47 ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:47 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

This patchset adds support for the new hcall H_RPT_INVALIDATE
and replaces the nested tlb flush calls with this new hcall
if support for the same exists.

Changes in v4:
-------------
- While reusing the tlb flush routines from radix_tlb.c in v3,
  setting of LPID got missed out. Take care of this by
  introducing new flush routines that set both PID and LPID
  when using tlbie instruction. This is required for
  process-scoped invalidations from guests (both L1 and
  nested guests). Added a new patch 1/3 for this.
- Added code to handle H_RPT_INVALIDATE hcall issued
  by nested guest in L0 nested guest exit path.

v3: https://lore.kernel.org/linuxppc-dev/20210105090557.2150104-1-bharata@linux.ibm.com/T/#t

Bharata B Rao (3):
  powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped
    invalidations from guests
  KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

 Documentation/virt/kvm/api.rst                |  17 ++
 .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
 arch/powerpc/include/asm/kvm_book3s.h         |   3 +
 arch/powerpc/include/asm/mmu_context.h        |  11 ++
 arch/powerpc/kvm/book3s_64_mmu_radix.c        |  27 +++-
 arch/powerpc/kvm/book3s_hv.c                  |  91 +++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c           | 108 ++++++++++++-
 arch/powerpc/kvm/powerpc.c                    |   3 +
 arch/powerpc/mm/book3s64/radix_tlb.c          | 147 +++++++++++++++++-
 include/uapi/linux/kvm.h                      |   1 +
 10 files changed, 415 insertions(+), 11 deletions(-)

-- 
2.26.2

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from gu
@ 2021-02-15  6:47   ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:47 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

H_RPT_INVALIDATE hcall needs to perform process scoped tlbie
invalidations of L1 and nested guests from L0. This needs RS register
for TLBIE instruction to contain both PID and LPID. Introduce
primitives that execute tlbie instruction with both PID
and LPID set in prepartion for H_RPT_INVALIDATE hcall.

While we are here, move RIC_FLUSH definitions to header file
and introduce helper rpti_pgsize_to_psize() that will be needed
by the upcoming hcall.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
 arch/powerpc/mm/book3s64/radix_tlb.c          | 122 +++++++++++++++++-
 2 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 94439e0cefc9..aace7e9b2397 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
 
 #include <asm/hvcall.h>
 
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
@@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
 	return H_RPTI_PAGE_ALL;
 }
 
+static inline int rpti_pgsize_to_psize(unsigned long page_size)
+{
+	if (page_size = H_RPTI_PAGE_4K)
+		return MMU_PAGE_4K;
+	if (page_size = H_RPTI_PAGE_64K)
+		return MMU_PAGE_64K;
+	if (page_size = H_RPTI_PAGE_2M)
+		return MMU_PAGE_2M;
+	if (page_size = H_RPTI_PAGE_1G)
+		return MMU_PAGE_1G;
+	else
+		return MMU_PAGE_64K; /* Default */
+}
+
 static inline int mmu_get_ap(int psize)
 {
 	return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index fb66d154b26c..097402435303 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -18,10 +18,6 @@
 #include <asm/cputhreads.h>
 #include <asm/plpar_wrappers.h>
 
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
 /*
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is\x01 or is\x10 or is\x11
@@ -128,6 +124,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
 {
 	unsigned long rb,rs,prs,r;
@@ -188,6 +199,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+					    unsigned long lpid,
+					    unsigned long ap, unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
 					    unsigned long ap, unsigned long ric)
 {
@@ -233,6 +261,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
 	}
 }
 
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+					     unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+	}
+}
+
 static inline void fixup_tlbie_pid(unsigned long pid)
 {
 	/*
@@ -252,6 +296,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
 	}
 }
 
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+				RIC_FLUSH_TLB);
+	}
+}
 
 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
 				       unsigned long ap)
@@ -342,6 +405,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+				   unsigned long ric)
+{
+	asm volatile("ptesync" : : : "memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+		fixup_tlbie_pid_lpid(pid, lpid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		fixup_tlbie_pid_lpid(pid, lpid);
+	}
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
 struct tlbiel_pid {
 	unsigned long pid;
 	unsigned long ric;
@@ -467,6 +555,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
 	fixup_tlbie_va_range(addr - page_size, pid, ap);
 }
 
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					 unsigned long pid, unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 				      unsigned long psize, unsigned long ric)
 {
@@ -547,6 +649,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					unsigned long pid, unsigned long lpid,
+					unsigned long page_size,
+					unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync" : : : "memory");
+	if (also_pwc)
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
 				unsigned long start, unsigned long end,
 				unsigned long pid, unsigned long page_size,
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
@ 2021-02-15  6:47   ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:47 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

Implement H_RPT_INVALIDATE hcall and add KVM capability
KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.

This hcall does two types of TLB invalidations:

1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
   This is currently not used in KVM as GTSE is not usually
   disabled in KVM.
2. Partition-scoped invalidations that an L1 hypervisor does on
   behalf of an L2 guest. This replaces the uses of the existing
   hcall H_TLB_INVALIDATE.

In order to handle process scoped invalidations of L2, we
intercept the nested exit handling code in L0 only to handle
H_TLB_INVALIDATE hcall.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 Documentation/virt/kvm/api.rst         | 17 +++++
 arch/powerpc/include/asm/kvm_book3s.h  |  3 +
 arch/powerpc/include/asm/mmu_context.h | 11 +++
 arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c             |  3 +
 arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
 include/uapi/linux/kvm.h               |  1 +
 8 files changed, 247 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 99ceb978c8b0..416c36aa35d4 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
 can then handle to implement model specific MSR handling and/or user notifications
 to inform a user that an MSR was not handled.
 
+7.22 KVM_CAP_PPC_RPT_INVALIDATE
+------------------------------
+
+:Capability: KVM_CAP_PPC_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is always enabled.
+
 8. Other capabilities.
 ===========
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index d32ec9ae73bd..0f1c5fa6e8ce 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+			 unsigned long type, unsigned long pg_sizes,
+			 unsigned long start, unsigned long end);
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
 			  u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index d5821834dba9..fbf3b5b45fe9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
 
 #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
+void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
+			 unsigned long type, unsigned long page_size,
+			 unsigned long psize, unsigned long start,
+			 unsigned long end);
 #else
 static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
+static inline void do_h_rpt_invalidate(unsigned long pid,
+				       unsigned long lpid,
+				       unsigned long type,
+				       unsigned long page_size,
+				       unsigned long psize,
+				       unsigned long start,
+				       unsigned long end) { }
 #endif
 
 extern void switch_cop(struct mm_struct *next);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6f612d240392..802cb77c39cc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 	return yield_count;
 }
 
+static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	unsigned long psize;
+
+	if (pg_sizes & H_RPTI_PAGE_64K) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
+		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
+				    start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_2M) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
+		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
+				    start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_1G) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
+		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
+				    start, end);
+	}
+
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+				    unsigned long pid, unsigned long target,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	if (!kvm_is_radix(vcpu->kvm))
+		return H_UNSUPPORTED;
+
+	if (kvmhv_on_pseries())
+		return H_UNSUPPORTED;
+
+	if (end < start)
+		return H_P5;
+
+	if (type & H_RPTI_TYPE_NESTED) {
+		if (!nesting_enabled(vcpu->kvm))
+			return H_FUNCTION;
+
+		/* Support only cores as target */
+		if (target != H_RPTI_TARGET_CMMU)
+			return H_P2;
+
+		return kvmhv_h_rpti_nested(vcpu, pid,
+					   (type & ~H_RPTI_TYPE_NESTED),
+					    pg_sizes, start, end);
+	}
+
+	do_h_rpt_invalidate_prs(pid, vcpu->kvm->arch.lpid, type, pg_sizes,
+				start, end);
+	return H_SUCCESS;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -1112,6 +1170,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		 */
 		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
 		break;
+	case H_RPT_INVALIDATE:
+		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+					      kvmppc_get_gpr(vcpu, 5),
+					      kvmppc_get_gpr(vcpu, 6),
+					      kvmppc_get_gpr(vcpu, 7),
+					      kvmppc_get_gpr(vcpu, 8),
+					      kvmppc_get_gpr(vcpu, 9));
+		break;
 
 	default:
 		return RESUME_HOST;
@@ -1158,6 +1224,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_XIRR_X:
 #endif
 	case H_PAGE_INIT:
+	case H_RPT_INVALIDATE:
 		return 1;
 	}
 
@@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		if (!xics_on_xive())
 			kvmppc_xics_rm_complete(vcpu, 0);
 		break;
+	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+		if (req != H_RPT_INVALIDATE) {
+			r = RESUME_HOST;
+			break;
+		}
+
+		/*
+		 * The H_RPT_INVALIDATE hcalls issued by nested
+		 * guest for process scoped invalidations when
+		 * GTSE=0 are handled here.
+		 */
+		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
+					vcpu->arch.nested->shadow_lpid,
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7),
+					kvmppc_get_gpr(vcpu, 8));
+		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+		r = RESUME_GUEST;
+		break;
+	}
 	default:
 		r = RESUME_HOST;
 		break;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 33b58549a9aa..40ed4eb80adb 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1149,6 +1149,102 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
 	return H_SUCCESS;
 }
 
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+					 unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long ap,
+					 unsigned long start,
+					 unsigned long end)
+{
+	unsigned long addr = start;
+	int ret;
+
+	do {
+		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+						   get_epn(addr));
+		if (ret)
+			return ret;
+		addr += page_size;
+	} while (addr < end);
+
+	return ret;
+}
+
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+					 unsigned long lpid)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (gp) {
+		kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
+		kvmhv_put_nested(gp);
+	}
+	return H_SUCCESS;
+}
+
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+			 unsigned long type, unsigned long pg_sizes,
+			 unsigned long start, unsigned long end)
+{
+	struct kvm_nested_guest *gp;
+	long ret;
+	unsigned long psize, ap;
+
+	/*
+	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
+	 *
+	 * However, nested KVM issues a L2 lpid flush call when creating
+	 * partition table entries for L2. This happens even before the
+	 * corresponding shadow lpid is created in HV which happens in
+	 * H_ENTER_NESTED call. Since we can't differentiate this case from
+	 * the invalid case, we ignore such flush requests and return success.
+	 */
+	gp = kvmhv_find_nested(vcpu->kvm, lpid);
+	if (!gp)
+		return H_SUCCESS;
+
+	if ((type & H_RPTI_TYPE_NESTED_ALL) = H_RPTI_TYPE_NESTED_ALL)
+		return do_tlb_invalidate_nested_all(vcpu, lpid);
+
+	if ((type & H_RPTI_TYPE_TLB) = H_RPTI_TYPE_TLB) {
+		if (pg_sizes & H_RPTI_PAGE_64K) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 16),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+
+		if (pg_sizes & H_RPTI_PAGE_2M) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 21),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+
+		if (pg_sizes & H_RPTI_PAGE_1G) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 30),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+	}
+	return H_SUCCESS;
+}
+
 /* Used to convert a nested guest real address to a L1 guest real address */
 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
 				       struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cf52d26f49cd..5388cd4a206a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
 			!kvmppc_hv_ops->enable_svm(NULL);
 		break;
+	case KVM_CAP_PPC_RPT_INVALIDATE:
+		r = 1;
+		break;
 #endif
 	default:
 		r = 0;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 097402435303..4f746d34b420 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1400,4 +1400,29 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 	}
 }
 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+
+/*
+ * Process-scoped invalidations for a given LPID.
+ */
+void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
+			 unsigned long type, unsigned long page_size,
+			 unsigned long psize, unsigned long start,
+			 unsigned long end)
+{
+	if ((type & H_RPTI_TYPE_ALL) = H_RPTI_TYPE_ALL) {
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		return;
+	}
+
+	if (type & H_RPTI_TYPE_PWC)
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+
+	if (!start && end = -1) /* PID */
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+	else /* EA */
+		_tlbie_va_range_lpid(start, end, pid, lpid, page_size,
+				     psize, false);
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate);
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 374c67875cdb..6fd530fae452 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1058,6 +1058,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PPC_RPT_INVALIDATE 193
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 3/3] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM
@ 2021-02-15  6:47   ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-15  6:47 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

In the nested KVM case, replace H_TLB_INVALIDATE by the new hcall
H_RPT_INVALIDATE if available. The availability of this hcall
is determined from "hcall-rpt-invalidate" string in ibm,hypertas-functions
DT property.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 27 +++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_nested.c    | 12 ++++++++++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..7ea5459022cb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
 #include <asm/pte-walk.h>
 #include <asm/ultravisor.h>
 #include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
 
 /*
  * Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	}
 
 	psi = shift_to_mmu_psize(pshift);
-	rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
-				lpid, rb);
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+		rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+					lpid, rb);
+	} else {
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB,
+					    psize_to_rpti_pgsize(psi),
+					    addr, addr + psize);
+	}
+
 	if (rc)
 		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+					    0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 40ed4eb80adb..0ebddb615684 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pte-walk.h>
 #include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
 
 static struct patb_entry *pseries_partition_tb;
 
@@ -402,8 +403,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					    H_RPTI_TYPE_PAT,
+					    H_RPTI_PAGE_ALL, 0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 }
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests
  2021-02-15  6:47   ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from gu Bharata B Rao
  (?)
@ 2021-02-15 13:10     ` kernel test robot
  -1 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2021-02-15 13:10 UTC (permalink / raw)
  To: Bharata B Rao, kvm-ppc, linuxppc-dev
  Cc: kbuild-all, farosas, npiggin, Bharata B Rao, clang-built-linux,
	aneesh.kumar, david

[-- Attachment #1: Type: text/plain, Size: 3274 bytes --]

Hi Bharata,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v5.11]
[cannot apply to powerpc/next next-20210212]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Bharata-B-Rao/Support-for-H_RPT_INVALIDATE-in-PowerPC-KVM/20210215-143815
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: powerpc64-randconfig-r005-20210215 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project c9439ca36342fb6013187d0a69aef92736951476)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://github.com/0day-ci/linux/commit/2a2c1320dc2bc67ec962721c39e7639cc1abfa9d
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Bharata-B-Rao/Support-for-H_RPT_INVALIDATE-in-PowerPC-KVM/20210215-143815
        git checkout 2a2c1320dc2bc67ec962721c39e7639cc1abfa9d
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/book3s64/radix_tlb.c:399:20: error: unused function '_tlbie_pid_lpid' [-Werror,-Wunused-function]
   static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
                      ^
>> arch/powerpc/mm/book3s64/radix_tlb.c:643:20: error: unused function '_tlbie_va_range_lpid' [-Werror,-Wunused-function]
   static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
                      ^
   2 errors generated.


vim +/_tlbie_pid_lpid +399 arch/powerpc/mm/book3s64/radix_tlb.c

   398	
 > 399	static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
   400					   unsigned long ric)
   401	{
   402		asm volatile("ptesync" : : : "memory");
   403	
   404		/*
   405		 * Workaround the fact that the "ric" argument to __tlbie_pid
   406		 * must be a compile-time contraint to match the "i" constraint
   407		 * in the asm statement.
   408		 */
   409		switch (ric) {
   410		case RIC_FLUSH_TLB:
   411			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
   412			fixup_tlbie_pid_lpid(pid, lpid);
   413			break;
   414		case RIC_FLUSH_PWC:
   415			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
   416			break;
   417		case RIC_FLUSH_ALL:
   418		default:
   419			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
   420			fixup_tlbie_pid_lpid(pid, lpid);
   421		}
   422		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
   423	}
   424	struct tlbiel_pid {
   425		unsigned long pid;
   426		unsigned long ric;
   427	};
   428	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25350 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests
@ 2021-02-15 13:10     ` kernel test robot
  0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2021-02-15 13:10 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3354 bytes --]

Hi Bharata,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v5.11]
[cannot apply to powerpc/next next-20210212]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Bharata-B-Rao/Support-for-H_RPT_INVALIDATE-in-PowerPC-KVM/20210215-143815
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: powerpc64-randconfig-r005-20210215 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project c9439ca36342fb6013187d0a69aef92736951476)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://github.com/0day-ci/linux/commit/2a2c1320dc2bc67ec962721c39e7639cc1abfa9d
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Bharata-B-Rao/Support-for-H_RPT_INVALIDATE-in-PowerPC-KVM/20210215-143815
        git checkout 2a2c1320dc2bc67ec962721c39e7639cc1abfa9d
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/book3s64/radix_tlb.c:399:20: error: unused function '_tlbie_pid_lpid' [-Werror,-Wunused-function]
   static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
                      ^
>> arch/powerpc/mm/book3s64/radix_tlb.c:643:20: error: unused function '_tlbie_va_range_lpid' [-Werror,-Wunused-function]
   static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
                      ^
   2 errors generated.


vim +/_tlbie_pid_lpid +399 arch/powerpc/mm/book3s64/radix_tlb.c

   398	
 > 399	static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
   400					   unsigned long ric)
   401	{
   402		asm volatile("ptesync" : : : "memory");
   403	
   404		/*
   405		 * Workaround the fact that the "ric" argument to __tlbie_pid
   406		 * must be a compile-time contraint to match the "i" constraint
   407		 * in the asm statement.
   408		 */
   409		switch (ric) {
   410		case RIC_FLUSH_TLB:
   411			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
   412			fixup_tlbie_pid_lpid(pid, lpid);
   413			break;
   414		case RIC_FLUSH_PWC:
   415			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
   416			break;
   417		case RIC_FLUSH_ALL:
   418		default:
   419			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
   420			fixup_tlbie_pid_lpid(pid, lpid);
   421		}
   422		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
   423	}
   424	struct tlbiel_pid {
   425		unsigned long pid;
   426		unsigned long ric;
   427	};
   428	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 25350 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro
@ 2021-02-15 13:10     ` kernel test robot
  0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2021-02-15 13:10 UTC (permalink / raw)
  To: Bharata B Rao, kvm-ppc, linuxppc-dev
  Cc: kbuild-all, farosas, npiggin, Bharata B Rao, clang-built-linux,
	aneesh.kumar, david

[-- Attachment #1: Type: text/plain, Size: 3274 bytes --]

Hi Bharata,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v5.11]
[cannot apply to powerpc/next next-20210212]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Bharata-B-Rao/Support-for-H_RPT_INVALIDATE-in-PowerPC-KVM/20210215-143815
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: powerpc64-randconfig-r005-20210215 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project c9439ca36342fb6013187d0a69aef92736951476)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://github.com/0day-ci/linux/commit/2a2c1320dc2bc67ec962721c39e7639cc1abfa9d
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Bharata-B-Rao/Support-for-H_RPT_INVALIDATE-in-PowerPC-KVM/20210215-143815
        git checkout 2a2c1320dc2bc67ec962721c39e7639cc1abfa9d
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/book3s64/radix_tlb.c:399:20: error: unused function '_tlbie_pid_lpid' [-Werror,-Wunused-function]
   static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
                      ^
>> arch/powerpc/mm/book3s64/radix_tlb.c:643:20: error: unused function '_tlbie_va_range_lpid' [-Werror,-Wunused-function]
   static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
                      ^
   2 errors generated.


vim +/_tlbie_pid_lpid +399 arch/powerpc/mm/book3s64/radix_tlb.c

   398	
 > 399	static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
   400					   unsigned long ric)
   401	{
   402		asm volatile("ptesync" : : : "memory");
   403	
   404		/*
   405		 * Workaround the fact that the "ric" argument to __tlbie_pid
   406		 * must be a compile-time contraint to match the "i" constraint
   407		 * in the asm statement.
   408		 */
   409		switch (ric) {
   410		case RIC_FLUSH_TLB:
   411			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
   412			fixup_tlbie_pid_lpid(pid, lpid);
   413			break;
   414		case RIC_FLUSH_PWC:
   415			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
   416			break;
   417		case RIC_FLUSH_ALL:
   418		default:
   419			__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
   420			fixup_tlbie_pid_lpid(pid, lpid);
   421		}
   422		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
   423	}
   424	struct tlbiel_pid {
   425		unsigned long pid;
   426		unsigned long ric;
   427	};
   428	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25350 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests
  2021-02-15  6:47   ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from gu Bharata B Rao
@ 2021-02-17  0:24     ` David Gibson
  -1 siblings, 0 replies; 21+ messages in thread
From: David Gibson @ 2021-02-17  0:24 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 8651 bytes --]

On Mon, Feb 15, 2021 at 12:05:40PM +0530, Bharata B Rao wrote:
> H_RPT_INVALIDATE hcall needs to perform process scoped tlbie
> invalidations of L1 and nested guests from L0. This needs RS register
> for TLBIE instruction to contain both PID and LPID. Introduce
> primitives that execute tlbie instruction with both PID
> and LPID set in prepartion for H_RPT_INVALIDATE hcall.
> 
> While we are here, move RIC_FLUSH definitions to header file
> and introduce helper rpti_pgsize_to_psize() that will be needed
> by the upcoming hcall.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> ---
>  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
>  arch/powerpc/mm/book3s64/radix_tlb.c          | 122 +++++++++++++++++-
>  2 files changed, 136 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index 94439e0cefc9..aace7e9b2397 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -4,6 +4,10 @@
>  
>  #include <asm/hvcall.h>
>  
> +#define RIC_FLUSH_TLB 0
> +#define RIC_FLUSH_PWC 1
> +#define RIC_FLUSH_ALL 2
> +
>  struct vm_area_struct;
>  struct mm_struct;
>  struct mmu_gather;
> @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
>  	return H_RPTI_PAGE_ALL;
>  }
>  
> +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> +{
> +	if (page_size == H_RPTI_PAGE_4K)
> +		return MMU_PAGE_4K;
> +	if (page_size == H_RPTI_PAGE_64K)
> +		return MMU_PAGE_64K;
> +	if (page_size == H_RPTI_PAGE_2M)
> +		return MMU_PAGE_2M;
> +	if (page_size == H_RPTI_PAGE_1G)
> +		return MMU_PAGE_1G;
> +	else
> +		return MMU_PAGE_64K; /* Default */
> +}

Would it make sense to put the H_RPT_PAGE_ tags into the
mmu_psize_defs table and scan that here, rather than open coding the
conversion?

> +
>  static inline int mmu_get_ap(int psize)
>  {
>  	return mmu_psize_defs[psize].ap;
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index fb66d154b26c..097402435303 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -18,10 +18,6 @@
>  #include <asm/cputhreads.h>
>  #include <asm/plpar_wrappers.h>
>  
> -#define RIC_FLUSH_TLB 0
> -#define RIC_FLUSH_PWC 1
> -#define RIC_FLUSH_ALL 2
> -
>  /*
>   * tlbiel instruction for radix, set invalidation
>   * i.e., r=1 and is=01 or is=10 or is=11
> @@ -128,6 +124,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
>  	trace_tlbie(0, 0, rb, rs, ric, prs, r);
>  }
>  
> +static __always_inline void __tlbie_pid_lpid(unsigned long pid,
> +					     unsigned long lpid,
> +					     unsigned long ric)
> +{
> +	unsigned long rb, rs, prs, r;
> +
> +	rb = PPC_BIT(53); /* IS = 1 */
> +	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
> +	prs = 1; /* process scoped */
> +	r = 1;   /* radix format */
> +
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
> +	trace_tlbie(0, 0, rb, rs, ric, prs, r);
> +}
>  static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
>  {
>  	unsigned long rb,rs,prs,r;
> @@ -188,6 +199,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
>  	trace_tlbie(0, 0, rb, rs, ric, prs, r);
>  }
>  
> +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
> +					    unsigned long lpid,
> +					    unsigned long ap, unsigned long ric)
> +{
> +	unsigned long rb, rs, prs, r;
> +
> +	rb = va & ~(PPC_BITMASK(52, 63));
> +	rb |= ap << PPC_BITLSHIFT(58);
> +	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
> +	prs = 1; /* process scoped */
> +	r = 1;   /* radix format */
> +
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
> +	trace_tlbie(0, 0, rb, rs, ric, prs, r);
> +}
> +
>  static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
>  					    unsigned long ap, unsigned long ric)
>  {
> @@ -233,6 +261,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
>  	}
>  }
>  
> +static inline void fixup_tlbie_va_range_lpid(unsigned long va,
> +					     unsigned long pid,
> +					     unsigned long lpid,
> +					     unsigned long ap)
> +{
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
> +	}
> +
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
> +	}
> +}
> +
>  static inline void fixup_tlbie_pid(unsigned long pid)
>  {
>  	/*
> @@ -252,6 +296,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
>  	}
>  }
>  
> +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
> +{
> +	/*
> +	 * We can use any address for the invalidation, pick one which is
> +	 * probably unused as an optimisation.
> +	 */
> +	unsigned long va = ((1UL << 52) - 1);
> +
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
> +	}
> +
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
> +				RIC_FLUSH_TLB);
> +	}
> +}
>  
>  static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
>  				       unsigned long ap)
> @@ -342,6 +405,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
>  	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>  }
>  
> +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
> +				   unsigned long ric)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +
> +	/*
> +	 * Workaround the fact that the "ric" argument to __tlbie_pid
> +	 * must be a compile-time contraint to match the "i" constraint
> +	 * in the asm statement.
> +	 */
> +	switch (ric) {
> +	case RIC_FLUSH_TLB:
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
> +		fixup_tlbie_pid_lpid(pid, lpid);
> +		break;
> +	case RIC_FLUSH_PWC:
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
> +		break;
> +	case RIC_FLUSH_ALL:
> +	default:
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
> +		fixup_tlbie_pid_lpid(pid, lpid);
> +	}
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
>  struct tlbiel_pid {
>  	unsigned long pid;
>  	unsigned long ric;
> @@ -467,6 +555,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
>  	fixup_tlbie_va_range(addr - page_size, pid, ap);
>  }
>  
> +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
> +					 unsigned long pid, unsigned long lpid,
> +					 unsigned long page_size,
> +					 unsigned long psize)
> +{
> +	unsigned long addr;
> +	unsigned long ap = mmu_get_ap(psize);
> +
> +	for (addr = start; addr < end; addr += page_size)
> +		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
> +
> +	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
> +}
> +
>  static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
>  				      unsigned long psize, unsigned long ric)
>  {
> @@ -547,6 +649,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
>  	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>  }
>  
> +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
> +					unsigned long pid, unsigned long lpid,
> +					unsigned long page_size,
> +					unsigned long psize, bool also_pwc)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +	if (also_pwc)
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
> +	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
>  static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
>  				unsigned long start, unsigned long end,
>  				unsigned long pid, unsigned long page_size,

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro
@ 2021-02-17  0:24     ` David Gibson
  0 siblings, 0 replies; 21+ messages in thread
From: David Gibson @ 2021-02-17  0:24 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 8651 bytes --]

On Mon, Feb 15, 2021 at 12:05:40PM +0530, Bharata B Rao wrote:
> H_RPT_INVALIDATE hcall needs to perform process scoped tlbie
> invalidations of L1 and nested guests from L0. This needs RS register
> for TLBIE instruction to contain both PID and LPID. Introduce
> primitives that execute tlbie instruction with both PID
> and LPID set in prepartion for H_RPT_INVALIDATE hcall.
> 
> While we are here, move RIC_FLUSH definitions to header file
> and introduce helper rpti_pgsize_to_psize() that will be needed
> by the upcoming hcall.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> ---
>  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
>  arch/powerpc/mm/book3s64/radix_tlb.c          | 122 +++++++++++++++++-
>  2 files changed, 136 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index 94439e0cefc9..aace7e9b2397 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -4,6 +4,10 @@
>  
>  #include <asm/hvcall.h>
>  
> +#define RIC_FLUSH_TLB 0
> +#define RIC_FLUSH_PWC 1
> +#define RIC_FLUSH_ALL 2
> +
>  struct vm_area_struct;
>  struct mm_struct;
>  struct mmu_gather;
> @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
>  	return H_RPTI_PAGE_ALL;
>  }
>  
> +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> +{
> +	if (page_size == H_RPTI_PAGE_4K)
> +		return MMU_PAGE_4K;
> +	if (page_size == H_RPTI_PAGE_64K)
> +		return MMU_PAGE_64K;
> +	if (page_size == H_RPTI_PAGE_2M)
> +		return MMU_PAGE_2M;
> +	if (page_size == H_RPTI_PAGE_1G)
> +		return MMU_PAGE_1G;
> +	else
> +		return MMU_PAGE_64K; /* Default */
> +}

Would it make sense to put the H_RPT_PAGE_ tags into the
mmu_psize_defs table and scan that here, rather than open coding the
conversion?

> +
>  static inline int mmu_get_ap(int psize)
>  {
>  	return mmu_psize_defs[psize].ap;
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index fb66d154b26c..097402435303 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -18,10 +18,6 @@
>  #include <asm/cputhreads.h>
>  #include <asm/plpar_wrappers.h>
>  
> -#define RIC_FLUSH_TLB 0
> -#define RIC_FLUSH_PWC 1
> -#define RIC_FLUSH_ALL 2
> -
>  /*
>   * tlbiel instruction for radix, set invalidation
>   * i.e., r=1 and is=01 or is=10 or is=11
> @@ -128,6 +124,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
>  	trace_tlbie(0, 0, rb, rs, ric, prs, r);
>  }
>  
> +static __always_inline void __tlbie_pid_lpid(unsigned long pid,
> +					     unsigned long lpid,
> +					     unsigned long ric)
> +{
> +	unsigned long rb, rs, prs, r;
> +
> +	rb = PPC_BIT(53); /* IS = 1 */
> +	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
> +	prs = 1; /* process scoped */
> +	r = 1;   /* radix format */
> +
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
> +	trace_tlbie(0, 0, rb, rs, ric, prs, r);
> +}
>  static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
>  {
>  	unsigned long rb,rs,prs,r;
> @@ -188,6 +199,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
>  	trace_tlbie(0, 0, rb, rs, ric, prs, r);
>  }
>  
> +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
> +					    unsigned long lpid,
> +					    unsigned long ap, unsigned long ric)
> +{
> +	unsigned long rb, rs, prs, r;
> +
> +	rb = va & ~(PPC_BITMASK(52, 63));
> +	rb |= ap << PPC_BITLSHIFT(58);
> +	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
> +	prs = 1; /* process scoped */
> +	r = 1;   /* radix format */
> +
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
> +	trace_tlbie(0, 0, rb, rs, ric, prs, r);
> +}
> +
>  static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
>  					    unsigned long ap, unsigned long ric)
>  {
> @@ -233,6 +261,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
>  	}
>  }
>  
> +static inline void fixup_tlbie_va_range_lpid(unsigned long va,
> +					     unsigned long pid,
> +					     unsigned long lpid,
> +					     unsigned long ap)
> +{
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
> +	}
> +
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
> +	}
> +}
> +
>  static inline void fixup_tlbie_pid(unsigned long pid)
>  {
>  	/*
> @@ -252,6 +296,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
>  	}
>  }
>  
> +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
> +{
> +	/*
> +	 * We can use any address for the invalidation, pick one which is
> +	 * probably unused as an optimisation.
> +	 */
> +	unsigned long va = ((1UL << 52) - 1);
> +
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
> +	}
> +
> +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
> +		asm volatile("ptesync" : : : "memory");
> +		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
> +				RIC_FLUSH_TLB);
> +	}
> +}
>  
>  static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
>  				       unsigned long ap)
> @@ -342,6 +405,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
>  	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>  }
>  
> +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
> +				   unsigned long ric)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +
> +	/*
> +	 * Workaround the fact that the "ric" argument to __tlbie_pid
> +	 * must be a compile-time contraint to match the "i" constraint
> +	 * in the asm statement.
> +	 */
> +	switch (ric) {
> +	case RIC_FLUSH_TLB:
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
> +		fixup_tlbie_pid_lpid(pid, lpid);
> +		break;
> +	case RIC_FLUSH_PWC:
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
> +		break;
> +	case RIC_FLUSH_ALL:
> +	default:
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
> +		fixup_tlbie_pid_lpid(pid, lpid);
> +	}
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
>  struct tlbiel_pid {
>  	unsigned long pid;
>  	unsigned long ric;
> @@ -467,6 +555,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
>  	fixup_tlbie_va_range(addr - page_size, pid, ap);
>  }
>  
> +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
> +					 unsigned long pid, unsigned long lpid,
> +					 unsigned long page_size,
> +					 unsigned long psize)
> +{
> +	unsigned long addr;
> +	unsigned long ap = mmu_get_ap(psize);
> +
> +	for (addr = start; addr < end; addr += page_size)
> +		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
> +
> +	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
> +}
> +
>  static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
>  				      unsigned long psize, unsigned long ric)
>  {
> @@ -547,6 +649,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
>  	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>  }
>  
> +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
> +					unsigned long pid, unsigned long lpid,
> +					unsigned long page_size,
> +					unsigned long psize, bool also_pwc)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +	if (also_pwc)
> +		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
> +	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
>  static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
>  				unsigned long start, unsigned long end,
>  				unsigned long pid, unsigned long page_size,

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  2021-02-15  6:47   ` Bharata B Rao
@ 2021-02-17  0:38     ` David Gibson
  -1 siblings, 0 replies; 21+ messages in thread
From: David Gibson @ 2021-02-17  0:38 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 14794 bytes --]

On Mon, Feb 15, 2021 at 12:05:41PM +0530, Bharata B Rao wrote:
> Implement H_RPT_INVALIDATE hcall and add KVM capability
> KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> 
> This hcall does two types of TLB invalidations:
> 
> 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
>    This is currently not used in KVM as GTSE is not usually
>    disabled in KVM.
> 2. Partition-scoped invalidations that an L1 hypervisor does on
>    behalf of an L2 guest. This replaces the uses of the existing
>    hcall H_TLB_INVALIDATE.
> 
> In order to handle process scoped invalidations of L2, we
> intercept the nested exit handling code in L0 only to handle
> H_TLB_INVALIDATE hcall.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> ---
>  Documentation/virt/kvm/api.rst         | 17 +++++
>  arch/powerpc/include/asm/kvm_book3s.h  |  3 +
>  arch/powerpc/include/asm/mmu_context.h | 11 +++
>  arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
>  arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
>  arch/powerpc/kvm/powerpc.c             |  3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
>  include/uapi/linux/kvm.h               |  1 +
>  8 files changed, 247 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 99ceb978c8b0..416c36aa35d4 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
>  can then handle to implement model specific MSR handling and/or user notifications
>  to inform a user that an MSR was not handled.
>  
> +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> +------------------------------
> +
> +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> +:Architectures: ppc
> +:Type: vm
> +
> +This capability indicates that the kernel is capable of handling
> +H_RPT_INVALIDATE hcall.
> +
> +In order to enable the use of H_RPT_INVALIDATE in the guest,
> +user space might have to advertise it for the guest. For example,
> +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> +present in the "ibm,hypertas-functions" device-tree property.
> +
> +This capability is always enabled.

I guess that means it's always enabled when it's available - I'm
pretty sure it won't be enabled on POWER8 or on PR KVM.

> +
>  8. Other capabilities.
>  ======================
>  
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index d32ec9ae73bd..0f1c5fa6e8ce 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
>  void kvmhv_release_all_nested(struct kvm *kvm);
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +			 unsigned long type, unsigned long pg_sizes,
> +			 unsigned long start, unsigned long end);
>  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
>  			  u64 time_limit, unsigned long lpcr);
>  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index d5821834dba9..fbf3b5b45fe9 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
>  
>  #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
>  extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> +			 unsigned long type, unsigned long page_size,
> +			 unsigned long psize, unsigned long start,
> +			 unsigned long end);
>  #else
>  static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> +static inline void do_h_rpt_invalidate(unsigned long pid,
> +				       unsigned long lpid,
> +				       unsigned long type,
> +				       unsigned long page_size,
> +				       unsigned long psize,
> +				       unsigned long start,
> +				       unsigned long end) { }
>  #endif
>  
>  extern void switch_cop(struct mm_struct *next);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6f612d240392..802cb77c39cc 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
>  	return yield_count;
>  }
>  
> +static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
> +				    unsigned long type, unsigned long pg_sizes,
> +				    unsigned long start, unsigned long end)
> +{
> +	unsigned long psize;
> +
> +	if (pg_sizes & H_RPTI_PAGE_64K) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
> +				    start, end);
> +	}
> +
> +	if (pg_sizes & H_RPTI_PAGE_2M) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
> +				    start, end);
> +	}
> +
> +	if (pg_sizes & H_RPTI_PAGE_1G) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
> +				    start, end);
> +	}

Hrm.  Here you're stepping through the hcall defined pagesizes, then
mapping each one to the Linux internal page size defs.

It might be more elegant to step through mmu_psize_defs table, and
conditionally performan an invalidate on that pagesize if the
corresponding bit in pg_sizes is set (as noted earlier you could
easily add the H_RPTI_PAGE bit to the table).  That way it's a direct
table lookup rather than a bunch of ifs or switches.

> +}
> +
> +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> +				    unsigned long pid, unsigned long target,
> +				    unsigned long type, unsigned long pg_sizes,
> +				    unsigned long start, unsigned long end)
> +{
> +	if (!kvm_is_radix(vcpu->kvm))
> +		return H_UNSUPPORTED;
> +
> +	if (kvmhv_on_pseries())
> +		return H_UNSUPPORTED;

This doesn't seem quite right.  If you have multiply nested guests,
won't the L2 be issueing H_RPT_INVALIDATE hcalls to the L1 on behalf
of the L3?  The L1 would have to implement them by calling the L0, but
the L1 can't just reject them, no?

Likewise for the !H_RPTI_TYPE_NESTED case, but on what happens to be a
nested guest in any case, couldn't this case legitimately arise and
need to be handled?

> +
> +	if (end < start)
> +		return H_P5;
> +
> +	if (type & H_RPTI_TYPE_NESTED) {
> +		if (!nesting_enabled(vcpu->kvm))
> +			return H_FUNCTION;
> +
> +		/* Support only cores as target */
> +		if (target != H_RPTI_TARGET_CMMU)
> +			return H_P2;
> +
> +		return kvmhv_h_rpti_nested(vcpu, pid,
> +					   (type & ~H_RPTI_TYPE_NESTED),
> +					    pg_sizes, start, end);
> +	}
> +
> +	do_h_rpt_invalidate_prs(pid, vcpu->kvm->arch.lpid, type, pg_sizes,
> +				start, end);
> +	return H_SUCCESS;
> +}
> +
>  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long req = kvmppc_get_gpr(vcpu, 3);
> @@ -1112,6 +1170,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  		 */
>  		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
>  		break;
> +	case H_RPT_INVALIDATE:
> +		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
> +					      kvmppc_get_gpr(vcpu, 5),
> +					      kvmppc_get_gpr(vcpu, 6),
> +					      kvmppc_get_gpr(vcpu, 7),
> +					      kvmppc_get_gpr(vcpu, 8),
> +					      kvmppc_get_gpr(vcpu, 9));
> +		break;
>  
>  	default:
>  		return RESUME_HOST;
> @@ -1158,6 +1224,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
>  	case H_XIRR_X:
>  #endif
>  	case H_PAGE_INIT:
> +	case H_RPT_INVALIDATE:
>  		return 1;
>  	}
>  
> @@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
>  		if (!xics_on_xive())
>  			kvmppc_xics_rm_complete(vcpu, 0);
>  		break;
> +	case BOOK3S_INTERRUPT_SYSCALL:
> +	{
> +		unsigned long req = kvmppc_get_gpr(vcpu, 3);
> +
> +		if (req != H_RPT_INVALIDATE) {
> +			r = RESUME_HOST;
> +			break;
> +		}
> +
> +		/*
> +		 * The H_RPT_INVALIDATE hcalls issued by nested
> +		 * guest for process scoped invalidations when
> +		 * GTSE=0 are handled here.
> +		 */
> +		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
> +					vcpu->arch.nested->shadow_lpid,
> +					kvmppc_get_gpr(vcpu, 5),
> +					kvmppc_get_gpr(vcpu, 6),
> +					kvmppc_get_gpr(vcpu, 7),
> +					kvmppc_get_gpr(vcpu, 8));
> +		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
> +		r = RESUME_GUEST;
> +		break;
> +	}
>  	default:
>  		r = RESUME_HOST;
>  		break;
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 33b58549a9aa..40ed4eb80adb 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -1149,6 +1149,102 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
>  	return H_SUCCESS;
>  }
>  
> +static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
> +					 unsigned long lpid,
> +					 unsigned long page_size,
> +					 unsigned long ap,
> +					 unsigned long start,
> +					 unsigned long end)
> +{
> +	unsigned long addr = start;
> +	int ret;
> +
> +	do {
> +		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
> +						   get_epn(addr));
> +		if (ret)
> +			return ret;
> +		addr += page_size;
> +	} while (addr < end);
> +
> +	return ret;
> +}
> +
> +static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
> +					 unsigned long lpid)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_nested_guest *gp;
> +
> +	gp = kvmhv_get_nested(kvm, lpid, false);
> +	if (gp) {
> +		kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
> +		kvmhv_put_nested(gp);
> +	}
> +	return H_SUCCESS;
> +}
> +
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +			 unsigned long type, unsigned long pg_sizes,
> +			 unsigned long start, unsigned long end)
> +{
> +	struct kvm_nested_guest *gp;
> +	long ret;
> +	unsigned long psize, ap;
> +
> +	/*
> +	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
> +	 *
> +	 * However, nested KVM issues a L2 lpid flush call when creating
> +	 * partition table entries for L2. This happens even before the
> +	 * corresponding shadow lpid is created in HV which happens in
> +	 * H_ENTER_NESTED call. Since we can't differentiate this case from
> +	 * the invalid case, we ignore such flush requests and return success.
> +	 */
> +	gp = kvmhv_find_nested(vcpu->kvm, lpid);
> +	if (!gp)
> +		return H_SUCCESS;
> +
> +	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
> +		return do_tlb_invalidate_nested_all(vcpu, lpid);
> +
> +	if ((type & H_RPTI_TYPE_TLB) == H_RPTI_TYPE_TLB) {
> +		if (pg_sizes & H_RPTI_PAGE_64K) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 16),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +
> +		if (pg_sizes & H_RPTI_PAGE_2M) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 21),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +
> +		if (pg_sizes & H_RPTI_PAGE_1G) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 30),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}

Again it might be more elegant to step through the pagesizes from the
mmu_psize_defs side, rather than from the pg_sizes side.

> +	}
> +	return H_SUCCESS;
> +}
> +
>  /* Used to convert a nested guest real address to a L1 guest real address */
>  static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
>  				       struct kvm_nested_guest *gp,
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index cf52d26f49cd..5388cd4a206a 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
>  			!kvmppc_hv_ops->enable_svm(NULL);
>  		break;
> +	case KVM_CAP_PPC_RPT_INVALIDATE:
> +		r = 1;
> +		break;
>  #endif
>  	default:
>  		r = 0;
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index 097402435303..4f746d34b420 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -1400,4 +1400,29 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
>  	}
>  }
>  EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
> +
> +/*
> + * Process-scoped invalidations for a given LPID.
> + */
> +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> +			 unsigned long type, unsigned long page_size,
> +			 unsigned long psize, unsigned long start,
> +			 unsigned long end)
> +{
> +	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
> +		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
> +		return;
> +	}
> +
> +	if (type & H_RPTI_TYPE_PWC)
> +		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
> +
> +	if (!start && end == -1) /* PID */
> +		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
> +	else /* EA */
> +		_tlbie_va_range_lpid(start, end, pid, lpid, page_size,
> +				     psize, false);
> +}
> +EXPORT_SYMBOL_GPL(do_h_rpt_invalidate);
> +
>  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 374c67875cdb..6fd530fae452 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1058,6 +1058,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
>  #define KVM_CAP_SYS_HYPERV_CPUID 191
>  #define KVM_CAP_DIRTY_LOG_RING 192
> +#define KVM_CAP_PPC_RPT_INVALIDATE 193
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
@ 2021-02-17  0:38     ` David Gibson
  0 siblings, 0 replies; 21+ messages in thread
From: David Gibson @ 2021-02-17  0:38 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 14794 bytes --]

On Mon, Feb 15, 2021 at 12:05:41PM +0530, Bharata B Rao wrote:
> Implement H_RPT_INVALIDATE hcall and add KVM capability
> KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> 
> This hcall does two types of TLB invalidations:
> 
> 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
>    This is currently not used in KVM as GTSE is not usually
>    disabled in KVM.
> 2. Partition-scoped invalidations that an L1 hypervisor does on
>    behalf of an L2 guest. This replaces the uses of the existing
>    hcall H_TLB_INVALIDATE.
> 
> In order to handle process scoped invalidations of L2, we
> intercept the nested exit handling code in L0 only to handle
> H_TLB_INVALIDATE hcall.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> ---
>  Documentation/virt/kvm/api.rst         | 17 +++++
>  arch/powerpc/include/asm/kvm_book3s.h  |  3 +
>  arch/powerpc/include/asm/mmu_context.h | 11 +++
>  arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
>  arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
>  arch/powerpc/kvm/powerpc.c             |  3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
>  include/uapi/linux/kvm.h               |  1 +
>  8 files changed, 247 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 99ceb978c8b0..416c36aa35d4 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
>  can then handle to implement model specific MSR handling and/or user notifications
>  to inform a user that an MSR was not handled.
>  
> +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> +------------------------------
> +
> +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> +:Architectures: ppc
> +:Type: vm
> +
> +This capability indicates that the kernel is capable of handling
> +H_RPT_INVALIDATE hcall.
> +
> +In order to enable the use of H_RPT_INVALIDATE in the guest,
> +user space might have to advertise it for the guest. For example,
> +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> +present in the "ibm,hypertas-functions" device-tree property.
> +
> +This capability is always enabled.

I guess that means it's always enabled when it's available - I'm
pretty sure it won't be enabled on POWER8 or on PR KVM.

> +
>  8. Other capabilities.
>  ======================
>  
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index d32ec9ae73bd..0f1c5fa6e8ce 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
>  void kvmhv_release_all_nested(struct kvm *kvm);
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +			 unsigned long type, unsigned long pg_sizes,
> +			 unsigned long start, unsigned long end);
>  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
>  			  u64 time_limit, unsigned long lpcr);
>  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index d5821834dba9..fbf3b5b45fe9 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
>  
>  #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
>  extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> +			 unsigned long type, unsigned long page_size,
> +			 unsigned long psize, unsigned long start,
> +			 unsigned long end);
>  #else
>  static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> +static inline void do_h_rpt_invalidate(unsigned long pid,
> +				       unsigned long lpid,
> +				       unsigned long type,
> +				       unsigned long page_size,
> +				       unsigned long psize,
> +				       unsigned long start,
> +				       unsigned long end) { }
>  #endif
>  
>  extern void switch_cop(struct mm_struct *next);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6f612d240392..802cb77c39cc 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
>  	return yield_count;
>  }
>  
> +static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
> +				    unsigned long type, unsigned long pg_sizes,
> +				    unsigned long start, unsigned long end)
> +{
> +	unsigned long psize;
> +
> +	if (pg_sizes & H_RPTI_PAGE_64K) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
> +				    start, end);
> +	}
> +
> +	if (pg_sizes & H_RPTI_PAGE_2M) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
> +				    start, end);
> +	}
> +
> +	if (pg_sizes & H_RPTI_PAGE_1G) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
> +				    start, end);
> +	}

Hrm.  Here you're stepping through the hcall defined pagesizes, then
mapping each one to the Linux internal page size defs.

It might be more elegant to step through mmu_psize_defs table, and
conditionally performan an invalidate on that pagesize if the
corresponding bit in pg_sizes is set (as noted earlier you could
easily add the H_RPTI_PAGE bit to the table).  That way it's a direct
table lookup rather than a bunch of ifs or switches.

> +}
> +
> +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> +				    unsigned long pid, unsigned long target,
> +				    unsigned long type, unsigned long pg_sizes,
> +				    unsigned long start, unsigned long end)
> +{
> +	if (!kvm_is_radix(vcpu->kvm))
> +		return H_UNSUPPORTED;
> +
> +	if (kvmhv_on_pseries())
> +		return H_UNSUPPORTED;

This doesn't seem quite right.  If you have multiply nested guests,
won't the L2 be issueing H_RPT_INVALIDATE hcalls to the L1 on behalf
of the L3?  The L1 would have to implement them by calling the L0, but
the L1 can't just reject them, no?

Likewise for the !H_RPTI_TYPE_NESTED case, but on what happens to be a
nested guest in any case, couldn't this case legitimately arise and
need to be handled?

> +
> +	if (end < start)
> +		return H_P5;
> +
> +	if (type & H_RPTI_TYPE_NESTED) {
> +		if (!nesting_enabled(vcpu->kvm))
> +			return H_FUNCTION;
> +
> +		/* Support only cores as target */
> +		if (target != H_RPTI_TARGET_CMMU)
> +			return H_P2;
> +
> +		return kvmhv_h_rpti_nested(vcpu, pid,
> +					   (type & ~H_RPTI_TYPE_NESTED),
> +					    pg_sizes, start, end);
> +	}
> +
> +	do_h_rpt_invalidate_prs(pid, vcpu->kvm->arch.lpid, type, pg_sizes,
> +				start, end);
> +	return H_SUCCESS;
> +}
> +
>  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long req = kvmppc_get_gpr(vcpu, 3);
> @@ -1112,6 +1170,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  		 */
>  		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
>  		break;
> +	case H_RPT_INVALIDATE:
> +		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
> +					      kvmppc_get_gpr(vcpu, 5),
> +					      kvmppc_get_gpr(vcpu, 6),
> +					      kvmppc_get_gpr(vcpu, 7),
> +					      kvmppc_get_gpr(vcpu, 8),
> +					      kvmppc_get_gpr(vcpu, 9));
> +		break;
>  
>  	default:
>  		return RESUME_HOST;
> @@ -1158,6 +1224,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
>  	case H_XIRR_X:
>  #endif
>  	case H_PAGE_INIT:
> +	case H_RPT_INVALIDATE:
>  		return 1;
>  	}
>  
> @@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
>  		if (!xics_on_xive())
>  			kvmppc_xics_rm_complete(vcpu, 0);
>  		break;
> +	case BOOK3S_INTERRUPT_SYSCALL:
> +	{
> +		unsigned long req = kvmppc_get_gpr(vcpu, 3);
> +
> +		if (req != H_RPT_INVALIDATE) {
> +			r = RESUME_HOST;
> +			break;
> +		}
> +
> +		/*
> +		 * The H_RPT_INVALIDATE hcalls issued by nested
> +		 * guest for process scoped invalidations when
> +		 * GTSE=0 are handled here.
> +		 */
> +		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
> +					vcpu->arch.nested->shadow_lpid,
> +					kvmppc_get_gpr(vcpu, 5),
> +					kvmppc_get_gpr(vcpu, 6),
> +					kvmppc_get_gpr(vcpu, 7),
> +					kvmppc_get_gpr(vcpu, 8));
> +		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
> +		r = RESUME_GUEST;
> +		break;
> +	}
>  	default:
>  		r = RESUME_HOST;
>  		break;
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 33b58549a9aa..40ed4eb80adb 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -1149,6 +1149,102 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
>  	return H_SUCCESS;
>  }
>  
> +static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
> +					 unsigned long lpid,
> +					 unsigned long page_size,
> +					 unsigned long ap,
> +					 unsigned long start,
> +					 unsigned long end)
> +{
> +	unsigned long addr = start;
> +	int ret;
> +
> +	do {
> +		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
> +						   get_epn(addr));
> +		if (ret)
> +			return ret;
> +		addr += page_size;
> +	} while (addr < end);
> +
> +	return ret;
> +}
> +
> +static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
> +					 unsigned long lpid)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_nested_guest *gp;
> +
> +	gp = kvmhv_get_nested(kvm, lpid, false);
> +	if (gp) {
> +		kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
> +		kvmhv_put_nested(gp);
> +	}
> +	return H_SUCCESS;
> +}
> +
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +			 unsigned long type, unsigned long pg_sizes,
> +			 unsigned long start, unsigned long end)
> +{
> +	struct kvm_nested_guest *gp;
> +	long ret;
> +	unsigned long psize, ap;
> +
> +	/*
> +	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
> +	 *
> +	 * However, nested KVM issues a L2 lpid flush call when creating
> +	 * partition table entries for L2. This happens even before the
> +	 * corresponding shadow lpid is created in HV which happens in
> +	 * H_ENTER_NESTED call. Since we can't differentiate this case from
> +	 * the invalid case, we ignore such flush requests and return success.
> +	 */
> +	gp = kvmhv_find_nested(vcpu->kvm, lpid);
> +	if (!gp)
> +		return H_SUCCESS;
> +
> +	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
> +		return do_tlb_invalidate_nested_all(vcpu, lpid);
> +
> +	if ((type & H_RPTI_TYPE_TLB) == H_RPTI_TYPE_TLB) {
> +		if (pg_sizes & H_RPTI_PAGE_64K) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 16),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +
> +		if (pg_sizes & H_RPTI_PAGE_2M) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 21),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +
> +		if (pg_sizes & H_RPTI_PAGE_1G) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 30),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}

Again it might be more elegant to step through the pagesizes from the
mmu_psize_defs side, rather than from the pg_sizes side.

> +	}
> +	return H_SUCCESS;
> +}
> +
>  /* Used to convert a nested guest real address to a L1 guest real address */
>  static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
>  				       struct kvm_nested_guest *gp,
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index cf52d26f49cd..5388cd4a206a 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
>  			!kvmppc_hv_ops->enable_svm(NULL);
>  		break;
> +	case KVM_CAP_PPC_RPT_INVALIDATE:
> +		r = 1;
> +		break;
>  #endif
>  	default:
>  		r = 0;
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index 097402435303..4f746d34b420 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -1400,4 +1400,29 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
>  	}
>  }
>  EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
> +
> +/*
> + * Process-scoped invalidations for a given LPID.
> + */
> +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> +			 unsigned long type, unsigned long page_size,
> +			 unsigned long psize, unsigned long start,
> +			 unsigned long end)
> +{
> +	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
> +		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
> +		return;
> +	}
> +
> +	if (type & H_RPTI_TYPE_PWC)
> +		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
> +
> +	if (!start && end == -1) /* PID */
> +		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
> +	else /* EA */
> +		_tlbie_va_range_lpid(start, end, pid, lpid, page_size,
> +				     psize, false);
> +}
> +EXPORT_SYMBOL_GPL(do_h_rpt_invalidate);
> +
>  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 374c67875cdb..6fd530fae452 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1058,6 +1058,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
>  #define KVM_CAP_SYS_HYPERV_CPUID 191
>  #define KVM_CAP_DIRTY_LOG_RING 192
> +#define KVM_CAP_PPC_RPT_INVALIDATE 193
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests
  2021-02-17  0:24     ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro David Gibson
@ 2021-02-22  6:37       ` Bharata B Rao
  -1 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-22  6:25 UTC (permalink / raw)
  To: David Gibson; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

On Wed, Feb 17, 2021 at 11:24:48AM +1100, David Gibson wrote:
> On Mon, Feb 15, 2021 at 12:05:40PM +0530, Bharata B Rao wrote:
> > H_RPT_INVALIDATE hcall needs to perform process scoped tlbie
> > invalidations of L1 and nested guests from L0. This needs RS register
> > for TLBIE instruction to contain both PID and LPID. Introduce
> > primitives that execute tlbie instruction with both PID
> > and LPID set in prepartion for H_RPT_INVALIDATE hcall.
> > 
> > While we are here, move RIC_FLUSH definitions to header file
> > and introduce helper rpti_pgsize_to_psize() that will be needed
> > by the upcoming hcall.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > ---
> >  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
> >  arch/powerpc/mm/book3s64/radix_tlb.c          | 122 +++++++++++++++++-
> >  2 files changed, 136 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> > index 94439e0cefc9..aace7e9b2397 100644
> > --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> > @@ -4,6 +4,10 @@
> >  
> >  #include <asm/hvcall.h>
> >  
> > +#define RIC_FLUSH_TLB 0
> > +#define RIC_FLUSH_PWC 1
> > +#define RIC_FLUSH_ALL 2
> > +
> >  struct vm_area_struct;
> >  struct mm_struct;
> >  struct mmu_gather;
> > @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
> >  	return H_RPTI_PAGE_ALL;
> >  }
> >  
> > +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> > +{
> > +	if (page_size == H_RPTI_PAGE_4K)
> > +		return MMU_PAGE_4K;
> > +	if (page_size == H_RPTI_PAGE_64K)
> > +		return MMU_PAGE_64K;
> > +	if (page_size == H_RPTI_PAGE_2M)
> > +		return MMU_PAGE_2M;
> > +	if (page_size == H_RPTI_PAGE_1G)
> > +		return MMU_PAGE_1G;
> > +	else
> > +		return MMU_PAGE_64K; /* Default */
> > +}
> 
> Would it make sense to put the H_RPT_PAGE_ tags into the
> mmu_psize_defs table and scan that here, rather than open coding the
> conversion?

I will give this a try and see how it looks.

Otherwise the changes in the patch which are mainly about
introducing primitives that require to set both PID and LPID
for tlbie instruction - do they look right?

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro
@ 2021-02-22  6:37       ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-22  6:37 UTC (permalink / raw)
  To: David Gibson; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

On Wed, Feb 17, 2021 at 11:24:48AM +1100, David Gibson wrote:
> On Mon, Feb 15, 2021 at 12:05:40PM +0530, Bharata B Rao wrote:
> > H_RPT_INVALIDATE hcall needs to perform process scoped tlbie
> > invalidations of L1 and nested guests from L0. This needs RS register
> > for TLBIE instruction to contain both PID and LPID. Introduce
> > primitives that execute tlbie instruction with both PID
> > and LPID set in prepartion for H_RPT_INVALIDATE hcall.
> > 
> > While we are here, move RIC_FLUSH definitions to header file
> > and introduce helper rpti_pgsize_to_psize() that will be needed
> > by the upcoming hcall.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > ---
> >  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
> >  arch/powerpc/mm/book3s64/radix_tlb.c          | 122 +++++++++++++++++-
> >  2 files changed, 136 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> > index 94439e0cefc9..aace7e9b2397 100644
> > --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> > @@ -4,6 +4,10 @@
> >  
> >  #include <asm/hvcall.h>
> >  
> > +#define RIC_FLUSH_TLB 0
> > +#define RIC_FLUSH_PWC 1
> > +#define RIC_FLUSH_ALL 2
> > +
> >  struct vm_area_struct;
> >  struct mm_struct;
> >  struct mmu_gather;
> > @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
> >  	return H_RPTI_PAGE_ALL;
> >  }
> >  
> > +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> > +{
> > +	if (page_size = H_RPTI_PAGE_4K)
> > +		return MMU_PAGE_4K;
> > +	if (page_size = H_RPTI_PAGE_64K)
> > +		return MMU_PAGE_64K;
> > +	if (page_size = H_RPTI_PAGE_2M)
> > +		return MMU_PAGE_2M;
> > +	if (page_size = H_RPTI_PAGE_1G)
> > +		return MMU_PAGE_1G;
> > +	else
> > +		return MMU_PAGE_64K; /* Default */
> > +}
> 
> Would it make sense to put the H_RPT_PAGE_ tags into the
> mmu_psize_defs table and scan that here, rather than open coding the
> conversion?

I will give this a try and see how it looks.

Otherwise the changes in the patch which are mainly about
introducing primitives that require to set both PID and LPID
for tlbie instruction - do they look right?

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  2021-02-17  0:38     ` David Gibson
@ 2021-02-22  6:58       ` Bharata B Rao
  -1 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-22  6:46 UTC (permalink / raw)
  To: David Gibson; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

On Wed, Feb 17, 2021 at 11:38:07AM +1100, David Gibson wrote:
> On Mon, Feb 15, 2021 at 12:05:41PM +0530, Bharata B Rao wrote:
> > Implement H_RPT_INVALIDATE hcall and add KVM capability
> > KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> > 
> > This hcall does two types of TLB invalidations:
> > 
> > 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
> >    This is currently not used in KVM as GTSE is not usually
> >    disabled in KVM.
> > 2. Partition-scoped invalidations that an L1 hypervisor does on
> >    behalf of an L2 guest. This replaces the uses of the existing
> >    hcall H_TLB_INVALIDATE.
> > 
> > In order to handle process scoped invalidations of L2, we
> > intercept the nested exit handling code in L0 only to handle
> > H_TLB_INVALIDATE hcall.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > ---
> >  Documentation/virt/kvm/api.rst         | 17 +++++
> >  arch/powerpc/include/asm/kvm_book3s.h  |  3 +
> >  arch/powerpc/include/asm/mmu_context.h | 11 +++
> >  arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
> >  arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
> >  arch/powerpc/kvm/powerpc.c             |  3 +
> >  arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
> >  include/uapi/linux/kvm.h               |  1 +
> >  8 files changed, 247 insertions(+)
> > 
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index 99ceb978c8b0..416c36aa35d4 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
> >  can then handle to implement model specific MSR handling and/or user notifications
> >  to inform a user that an MSR was not handled.
> >  
> > +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> > +------------------------------
> > +
> > +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> > +:Architectures: ppc
> > +:Type: vm
> > +
> > +This capability indicates that the kernel is capable of handling
> > +H_RPT_INVALIDATE hcall.
> > +
> > +In order to enable the use of H_RPT_INVALIDATE in the guest,
> > +user space might have to advertise it for the guest. For example,
> > +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> > +present in the "ibm,hypertas-functions" device-tree property.
> > +
> > +This capability is always enabled.
> 
> I guess that means it's always enabled when it's available - I'm
> pretty sure it won't be enabled on POWER8 or on PR KVM.

Correct, will reword this and restrict this to POWER9, radix etc

> 
> > +
> >  8. Other capabilities.
> >  ======================
> >  
> > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> > index d32ec9ae73bd..0f1c5fa6e8ce 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
> >  void kvmhv_release_all_nested(struct kvm *kvm);
> >  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
> >  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> > +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> > +			 unsigned long type, unsigned long pg_sizes,
> > +			 unsigned long start, unsigned long end);
> >  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
> >  			  u64 time_limit, unsigned long lpcr);
> >  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > index d5821834dba9..fbf3b5b45fe9 100644
> > --- a/arch/powerpc/include/asm/mmu_context.h
> > +++ b/arch/powerpc/include/asm/mmu_context.h
> > @@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
> >  
> >  #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
> >  extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> > +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> > +			 unsigned long type, unsigned long page_size,
> > +			 unsigned long psize, unsigned long start,
> > +			 unsigned long end);
> >  #else
> >  static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> > +static inline void do_h_rpt_invalidate(unsigned long pid,
> > +				       unsigned long lpid,
> > +				       unsigned long type,
> > +				       unsigned long page_size,
> > +				       unsigned long psize,
> > +				       unsigned long start,
> > +				       unsigned long end) { }
> >  #endif
> >  
> >  extern void switch_cop(struct mm_struct *next);
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 6f612d240392..802cb77c39cc 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
> >  	return yield_count;
> >  }
> >  
> > +static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
> > +				    unsigned long type, unsigned long pg_sizes,
> > +				    unsigned long start, unsigned long end)
> > +{
> > +	unsigned long psize;
> > +
> > +	if (pg_sizes & H_RPTI_PAGE_64K) {
> > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
> > +				    start, end);
> > +	}
> > +
> > +	if (pg_sizes & H_RPTI_PAGE_2M) {
> > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
> > +				    start, end);
> > +	}
> > +
> > +	if (pg_sizes & H_RPTI_PAGE_1G) {
> > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
> > +				    start, end);
> > +	}
> 
> Hrm.  Here you're stepping through the hcall defined pagesizes, then
> mapping each one to the Linux internal page size defs.
> 
> It might be more elegant to step through mmu_psize_defs table, and
> conditionally performan an invalidate on that pagesize if the
> corresponding bit in pg_sizes is set (as noted earlier you could
> easily add the H_RPTI_PAGE bit to the table).  That way it's a direct
> table lookup rather than a bunch of ifs or switches.

Yes, let me give this a try.

> 
> > +}
> > +
> > +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> > +				    unsigned long pid, unsigned long target,
> > +				    unsigned long type, unsigned long pg_sizes,
> > +				    unsigned long start, unsigned long end)
> > +{
> > +	if (!kvm_is_radix(vcpu->kvm))
> > +		return H_UNSUPPORTED;
> > +
> > +	if (kvmhv_on_pseries())
> > +		return H_UNSUPPORTED;
> 
> This doesn't seem quite right.  If you have multiply nested guests,
> won't the L2 be issueing H_RPT_INVALIDATE hcalls to the L1 on behalf
> of the L3?  The L1 would have to implement them by calling the L0, but
> the L1 can't just reject them, no?
> 
> Likewise for the !H_RPTI_TYPE_NESTED case, but on what happens to be a
> nested guest in any case, couldn't this case legitimately arise and
> need to be handled?

The approach is to handle this hcall on behalf of all the nested
guests in L0 only. I am intercepting the nested exit path precisely
for this as shown in the below hunk.

> > @@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
> >  		if (!xics_on_xive())
> >  			kvmppc_xics_rm_complete(vcpu, 0);
> >  		break;
> > +	case BOOK3S_INTERRUPT_SYSCALL:
> > +	{
> > +		unsigned long req = kvmppc_get_gpr(vcpu, 3);
> > +
> > +		if (req != H_RPT_INVALIDATE) {
> > +			r = RESUME_HOST;
> > +			break;
> > +		}
> > +
> > +		/*
> > +		 * The H_RPT_INVALIDATE hcalls issued by nested
> > +		 * guest for process scoped invalidations when
> > +		 * GTSE=0 are handled here.
> > +		 */
> > +		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
> > +					vcpu->arch.nested->shadow_lpid,
> > +					kvmppc_get_gpr(vcpu, 5),
> > +					kvmppc_get_gpr(vcpu, 6),
> > +					kvmppc_get_gpr(vcpu, 7),
> > +					kvmppc_get_gpr(vcpu, 8));
> > +		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
> > +		r = RESUME_GUEST;
> > +		break;
> > +	}
> >  	default:
> >  		r = RESUME_HOST;
> >  		break;

Thanks for your review.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
@ 2021-02-22  6:58       ` Bharata B Rao
  0 siblings, 0 replies; 21+ messages in thread
From: Bharata B Rao @ 2021-02-22  6:58 UTC (permalink / raw)
  To: David Gibson; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

On Wed, Feb 17, 2021 at 11:38:07AM +1100, David Gibson wrote:
> On Mon, Feb 15, 2021 at 12:05:41PM +0530, Bharata B Rao wrote:
> > Implement H_RPT_INVALIDATE hcall and add KVM capability
> > KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> > 
> > This hcall does two types of TLB invalidations:
> > 
> > 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
> >    This is currently not used in KVM as GTSE is not usually
> >    disabled in KVM.
> > 2. Partition-scoped invalidations that an L1 hypervisor does on
> >    behalf of an L2 guest. This replaces the uses of the existing
> >    hcall H_TLB_INVALIDATE.
> > 
> > In order to handle process scoped invalidations of L2, we
> > intercept the nested exit handling code in L0 only to handle
> > H_TLB_INVALIDATE hcall.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > ---
> >  Documentation/virt/kvm/api.rst         | 17 +++++
> >  arch/powerpc/include/asm/kvm_book3s.h  |  3 +
> >  arch/powerpc/include/asm/mmu_context.h | 11 +++
> >  arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
> >  arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
> >  arch/powerpc/kvm/powerpc.c             |  3 +
> >  arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
> >  include/uapi/linux/kvm.h               |  1 +
> >  8 files changed, 247 insertions(+)
> > 
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index 99ceb978c8b0..416c36aa35d4 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
> >  can then handle to implement model specific MSR handling and/or user notifications
> >  to inform a user that an MSR was not handled.
> >  
> > +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> > +------------------------------
> > +
> > +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> > +:Architectures: ppc
> > +:Type: vm
> > +
> > +This capability indicates that the kernel is capable of handling
> > +H_RPT_INVALIDATE hcall.
> > +
> > +In order to enable the use of H_RPT_INVALIDATE in the guest,
> > +user space might have to advertise it for the guest. For example,
> > +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> > +present in the "ibm,hypertas-functions" device-tree property.
> > +
> > +This capability is always enabled.
> 
> I guess that means it's always enabled when it's available - I'm
> pretty sure it won't be enabled on POWER8 or on PR KVM.

Correct, will reword this and restrict this to POWER9, radix etc

> 
> > +
> >  8. Other capabilities.
> >  ===========
> >  
> > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> > index d32ec9ae73bd..0f1c5fa6e8ce 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
> >  void kvmhv_release_all_nested(struct kvm *kvm);
> >  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
> >  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> > +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> > +			 unsigned long type, unsigned long pg_sizes,
> > +			 unsigned long start, unsigned long end);
> >  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
> >  			  u64 time_limit, unsigned long lpcr);
> >  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > index d5821834dba9..fbf3b5b45fe9 100644
> > --- a/arch/powerpc/include/asm/mmu_context.h
> > +++ b/arch/powerpc/include/asm/mmu_context.h
> > @@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
> >  
> >  #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
> >  extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> > +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> > +			 unsigned long type, unsigned long page_size,
> > +			 unsigned long psize, unsigned long start,
> > +			 unsigned long end);
> >  #else
> >  static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> > +static inline void do_h_rpt_invalidate(unsigned long pid,
> > +				       unsigned long lpid,
> > +				       unsigned long type,
> > +				       unsigned long page_size,
> > +				       unsigned long psize,
> > +				       unsigned long start,
> > +				       unsigned long end) { }
> >  #endif
> >  
> >  extern void switch_cop(struct mm_struct *next);
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 6f612d240392..802cb77c39cc 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
> >  	return yield_count;
> >  }
> >  
> > +static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
> > +				    unsigned long type, unsigned long pg_sizes,
> > +				    unsigned long start, unsigned long end)
> > +{
> > +	unsigned long psize;
> > +
> > +	if (pg_sizes & H_RPTI_PAGE_64K) {
> > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
> > +				    start, end);
> > +	}
> > +
> > +	if (pg_sizes & H_RPTI_PAGE_2M) {
> > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
> > +				    start, end);
> > +	}
> > +
> > +	if (pg_sizes & H_RPTI_PAGE_1G) {
> > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
> > +				    start, end);
> > +	}
> 
> Hrm.  Here you're stepping through the hcall defined pagesizes, then
> mapping each one to the Linux internal page size defs.
> 
> It might be more elegant to step through mmu_psize_defs table, and
> conditionally performan an invalidate on that pagesize if the
> corresponding bit in pg_sizes is set (as noted earlier you could
> easily add the H_RPTI_PAGE bit to the table).  That way it's a direct
> table lookup rather than a bunch of ifs or switches.

Yes, let me give this a try.

> 
> > +}
> > +
> > +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> > +				    unsigned long pid, unsigned long target,
> > +				    unsigned long type, unsigned long pg_sizes,
> > +				    unsigned long start, unsigned long end)
> > +{
> > +	if (!kvm_is_radix(vcpu->kvm))
> > +		return H_UNSUPPORTED;
> > +
> > +	if (kvmhv_on_pseries())
> > +		return H_UNSUPPORTED;
> 
> This doesn't seem quite right.  If you have multiply nested guests,
> won't the L2 be issueing H_RPT_INVALIDATE hcalls to the L1 on behalf
> of the L3?  The L1 would have to implement them by calling the L0, but
> the L1 can't just reject them, no?
> 
> Likewise for the !H_RPTI_TYPE_NESTED case, but on what happens to be a
> nested guest in any case, couldn't this case legitimately arise and
> need to be handled?

The approach is to handle this hcall on behalf of all the nested
guests in L0 only. I am intercepting the nested exit path precisely
for this as shown in the below hunk.

> > @@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
> >  		if (!xics_on_xive())
> >  			kvmppc_xics_rm_complete(vcpu, 0);
> >  		break;
> > +	case BOOK3S_INTERRUPT_SYSCALL:
> > +	{
> > +		unsigned long req = kvmppc_get_gpr(vcpu, 3);
> > +
> > +		if (req != H_RPT_INVALIDATE) {
> > +			r = RESUME_HOST;
> > +			break;
> > +		}
> > +
> > +		/*
> > +		 * The H_RPT_INVALIDATE hcalls issued by nested
> > +		 * guest for process scoped invalidations when
> > +		 * GTSE=0 are handled here.
> > +		 */
> > +		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
> > +					vcpu->arch.nested->shadow_lpid,
> > +					kvmppc_get_gpr(vcpu, 5),
> > +					kvmppc_get_gpr(vcpu, 6),
> > +					kvmppc_get_gpr(vcpu, 7),
> > +					kvmppc_get_gpr(vcpu, 8));
> > +		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
> > +		r = RESUME_GUEST;
> > +		break;
> > +	}
> >  	default:
> >  		r = RESUME_HOST;
> >  		break;

Thanks for your review.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  2021-02-22  6:58       ` Bharata B Rao
@ 2021-02-22 23:26         ` David Gibson
  -1 siblings, 0 replies; 21+ messages in thread
From: David Gibson @ 2021-02-22 23:26 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 9276 bytes --]

On Mon, Feb 22, 2021 at 12:16:08PM +0530, Bharata B Rao wrote:
> On Wed, Feb 17, 2021 at 11:38:07AM +1100, David Gibson wrote:
> > On Mon, Feb 15, 2021 at 12:05:41PM +0530, Bharata B Rao wrote:
> > > Implement H_RPT_INVALIDATE hcall and add KVM capability
> > > KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> > > 
> > > This hcall does two types of TLB invalidations:
> > > 
> > > 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
> > >    This is currently not used in KVM as GTSE is not usually
> > >    disabled in KVM.
> > > 2. Partition-scoped invalidations that an L1 hypervisor does on
> > >    behalf of an L2 guest. This replaces the uses of the existing
> > >    hcall H_TLB_INVALIDATE.
> > > 
> > > In order to handle process scoped invalidations of L2, we
> > > intercept the nested exit handling code in L0 only to handle
> > > H_TLB_INVALIDATE hcall.
> > > 
> > > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > > ---
> > >  Documentation/virt/kvm/api.rst         | 17 +++++
> > >  arch/powerpc/include/asm/kvm_book3s.h  |  3 +
> > >  arch/powerpc/include/asm/mmu_context.h | 11 +++
> > >  arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
> > >  arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
> > >  arch/powerpc/kvm/powerpc.c             |  3 +
> > >  arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
> > >  include/uapi/linux/kvm.h               |  1 +
> > >  8 files changed, 247 insertions(+)
> > > 
> > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > index 99ceb978c8b0..416c36aa35d4 100644
> > > --- a/Documentation/virt/kvm/api.rst
> > > +++ b/Documentation/virt/kvm/api.rst
> > > @@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
> > >  can then handle to implement model specific MSR handling and/or user notifications
> > >  to inform a user that an MSR was not handled.
> > >  
> > > +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> > > +------------------------------
> > > +
> > > +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> > > +:Architectures: ppc
> > > +:Type: vm
> > > +
> > > +This capability indicates that the kernel is capable of handling
> > > +H_RPT_INVALIDATE hcall.
> > > +
> > > +In order to enable the use of H_RPT_INVALIDATE in the guest,
> > > +user space might have to advertise it for the guest. For example,
> > > +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> > > +present in the "ibm,hypertas-functions" device-tree property.
> > > +
> > > +This capability is always enabled.
> > 
> > I guess that means it's always enabled when it's available - I'm
> > pretty sure it won't be enabled on POWER8 or on PR KVM.
> 
> Correct, will reword this and restrict this to POWER9, radix etc
> 
> > 
> > > +
> > >  8. Other capabilities.
> > >  ======================
> > >  
> > > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> > > index d32ec9ae73bd..0f1c5fa6e8ce 100644
> > > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > > @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
> > >  void kvmhv_release_all_nested(struct kvm *kvm);
> > >  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
> > >  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> > > +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> > > +			 unsigned long type, unsigned long pg_sizes,
> > > +			 unsigned long start, unsigned long end);
> > >  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
> > >  			  u64 time_limit, unsigned long lpcr);
> > >  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > > index d5821834dba9..fbf3b5b45fe9 100644
> > > --- a/arch/powerpc/include/asm/mmu_context.h
> > > +++ b/arch/powerpc/include/asm/mmu_context.h
> > > @@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
> > >  
> > >  #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
> > >  extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> > > +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> > > +			 unsigned long type, unsigned long page_size,
> > > +			 unsigned long psize, unsigned long start,
> > > +			 unsigned long end);
> > >  #else
> > >  static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> > > +static inline void do_h_rpt_invalidate(unsigned long pid,
> > > +				       unsigned long lpid,
> > > +				       unsigned long type,
> > > +				       unsigned long page_size,
> > > +				       unsigned long psize,
> > > +				       unsigned long start,
> > > +				       unsigned long end) { }
> > >  #endif
> > >  
> > >  extern void switch_cop(struct mm_struct *next);
> > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > > index 6f612d240392..802cb77c39cc 100644
> > > --- a/arch/powerpc/kvm/book3s_hv.c
> > > +++ b/arch/powerpc/kvm/book3s_hv.c
> > > @@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
> > >  	return yield_count;
> > >  }
> > >  
> > > +static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
> > > +				    unsigned long type, unsigned long pg_sizes,
> > > +				    unsigned long start, unsigned long end)
> > > +{
> > > +	unsigned long psize;
> > > +
> > > +	if (pg_sizes & H_RPTI_PAGE_64K) {
> > > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> > > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
> > > +				    start, end);
> > > +	}
> > > +
> > > +	if (pg_sizes & H_RPTI_PAGE_2M) {
> > > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> > > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
> > > +				    start, end);
> > > +	}
> > > +
> > > +	if (pg_sizes & H_RPTI_PAGE_1G) {
> > > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> > > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
> > > +				    start, end);
> > > +	}
> > 
> > Hrm.  Here you're stepping through the hcall defined pagesizes, then
> > mapping each one to the Linux internal page size defs.
> > 
> > It might be more elegant to step through mmu_psize_defs table, and
> > conditionally performan an invalidate on that pagesize if the
> > corresponding bit in pg_sizes is set (as noted earlier you could
> > easily add the H_RPTI_PAGE bit to the table).  That way it's a direct
> > table lookup rather than a bunch of ifs or switches.
> 
> Yes, let me give this a try.
> 
> > 
> > > +}
> > > +
> > > +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> > > +				    unsigned long pid, unsigned long target,
> > > +				    unsigned long type, unsigned long pg_sizes,
> > > +				    unsigned long start, unsigned long end)
> > > +{
> > > +	if (!kvm_is_radix(vcpu->kvm))
> > > +		return H_UNSUPPORTED;
> > > +
> > > +	if (kvmhv_on_pseries())
> > > +		return H_UNSUPPORTED;
> > 
> > This doesn't seem quite right.  If you have multiply nested guests,
> > won't the L2 be issueing H_RPT_INVALIDATE hcalls to the L1 on behalf
> > of the L3?  The L1 would have to implement them by calling the L0, but
> > the L1 can't just reject them, no?
> > 
> > Likewise for the !H_RPTI_TYPE_NESTED case, but on what happens to be a
> > nested guest in any case, couldn't this case legitimately arise and
> > need to be handled?
> 
> The approach is to handle this hcall on behalf of all the nested
> guests in L0 only. I am intercepting the nested exit path precisely
> for this as shown in the below hunk.

Ah, I see.  Might be worth commenting that, since it's not necessarily obvious.

> 
> > > @@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
> > >  		if (!xics_on_xive())
> > >  			kvmppc_xics_rm_complete(vcpu, 0);
> > >  		break;
> > > +	case BOOK3S_INTERRUPT_SYSCALL:
> > > +	{
> > > +		unsigned long req = kvmppc_get_gpr(vcpu, 3);
> > > +
> > > +		if (req != H_RPT_INVALIDATE) {
> > > +			r = RESUME_HOST;
> > > +			break;
> > > +		}
> > > +
> > > +		/*
> > > +		 * The H_RPT_INVALIDATE hcalls issued by nested
> > > +		 * guest for process scoped invalidations when
> > > +		 * GTSE=0 are handled here.
> > > +		 */
> > > +		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
> > > +					vcpu->arch.nested->shadow_lpid,
> > > +					kvmppc_get_gpr(vcpu, 5),
> > > +					kvmppc_get_gpr(vcpu, 6),
> > > +					kvmppc_get_gpr(vcpu, 7),
> > > +					kvmppc_get_gpr(vcpu, 8));
> > > +		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
> > > +		r = RESUME_GUEST;
> > > +		break;
> > > +	}
> > >  	default:
> > >  		r = RESUME_HOST;
> > >  		break;
> 
> Thanks for your review.
> 
> Regards,
> Bharata.
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
@ 2021-02-22 23:26         ` David Gibson
  0 siblings, 0 replies; 21+ messages in thread
From: David Gibson @ 2021-02-22 23:26 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: farosas, aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 9276 bytes --]

On Mon, Feb 22, 2021 at 12:16:08PM +0530, Bharata B Rao wrote:
> On Wed, Feb 17, 2021 at 11:38:07AM +1100, David Gibson wrote:
> > On Mon, Feb 15, 2021 at 12:05:41PM +0530, Bharata B Rao wrote:
> > > Implement H_RPT_INVALIDATE hcall and add KVM capability
> > > KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> > > 
> > > This hcall does two types of TLB invalidations:
> > > 
> > > 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
> > >    This is currently not used in KVM as GTSE is not usually
> > >    disabled in KVM.
> > > 2. Partition-scoped invalidations that an L1 hypervisor does on
> > >    behalf of an L2 guest. This replaces the uses of the existing
> > >    hcall H_TLB_INVALIDATE.
> > > 
> > > In order to handle process scoped invalidations of L2, we
> > > intercept the nested exit handling code in L0 only to handle
> > > H_TLB_INVALIDATE hcall.
> > > 
> > > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > > ---
> > >  Documentation/virt/kvm/api.rst         | 17 +++++
> > >  arch/powerpc/include/asm/kvm_book3s.h  |  3 +
> > >  arch/powerpc/include/asm/mmu_context.h | 11 +++
> > >  arch/powerpc/kvm/book3s_hv.c           | 91 ++++++++++++++++++++++++
> > >  arch/powerpc/kvm/book3s_hv_nested.c    | 96 ++++++++++++++++++++++++++
> > >  arch/powerpc/kvm/powerpc.c             |  3 +
> > >  arch/powerpc/mm/book3s64/radix_tlb.c   | 25 +++++++
> > >  include/uapi/linux/kvm.h               |  1 +
> > >  8 files changed, 247 insertions(+)
> > > 
> > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > index 99ceb978c8b0..416c36aa35d4 100644
> > > --- a/Documentation/virt/kvm/api.rst
> > > +++ b/Documentation/virt/kvm/api.rst
> > > @@ -6038,6 +6038,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
> > >  can then handle to implement model specific MSR handling and/or user notifications
> > >  to inform a user that an MSR was not handled.
> > >  
> > > +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> > > +------------------------------
> > > +
> > > +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> > > +:Architectures: ppc
> > > +:Type: vm
> > > +
> > > +This capability indicates that the kernel is capable of handling
> > > +H_RPT_INVALIDATE hcall.
> > > +
> > > +In order to enable the use of H_RPT_INVALIDATE in the guest,
> > > +user space might have to advertise it for the guest. For example,
> > > +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> > > +present in the "ibm,hypertas-functions" device-tree property.
> > > +
> > > +This capability is always enabled.
> > 
> > I guess that means it's always enabled when it's available - I'm
> > pretty sure it won't be enabled on POWER8 or on PR KVM.
> 
> Correct, will reword this and restrict this to POWER9, radix etc
> 
> > 
> > > +
> > >  8. Other capabilities.
> > >  ======================
> > >  
> > > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> > > index d32ec9ae73bd..0f1c5fa6e8ce 100644
> > > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > > @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
> > >  void kvmhv_release_all_nested(struct kvm *kvm);
> > >  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
> > >  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> > > +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> > > +			 unsigned long type, unsigned long pg_sizes,
> > > +			 unsigned long start, unsigned long end);
> > >  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
> > >  			  u64 time_limit, unsigned long lpcr);
> > >  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > > index d5821834dba9..fbf3b5b45fe9 100644
> > > --- a/arch/powerpc/include/asm/mmu_context.h
> > > +++ b/arch/powerpc/include/asm/mmu_context.h
> > > @@ -124,8 +124,19 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
> > >  
> > >  #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
> > >  extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> > > +void do_h_rpt_invalidate(unsigned long pid, unsigned long lpid,
> > > +			 unsigned long type, unsigned long page_size,
> > > +			 unsigned long psize, unsigned long start,
> > > +			 unsigned long end);
> > >  #else
> > >  static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> > > +static inline void do_h_rpt_invalidate(unsigned long pid,
> > > +				       unsigned long lpid,
> > > +				       unsigned long type,
> > > +				       unsigned long page_size,
> > > +				       unsigned long psize,
> > > +				       unsigned long start,
> > > +				       unsigned long end) { }
> > >  #endif
> > >  
> > >  extern void switch_cop(struct mm_struct *next);
> > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > > index 6f612d240392..802cb77c39cc 100644
> > > --- a/arch/powerpc/kvm/book3s_hv.c
> > > +++ b/arch/powerpc/kvm/book3s_hv.c
> > > @@ -904,6 +904,64 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
> > >  	return yield_count;
> > >  }
> > >  
> > > +static void do_h_rpt_invalidate_prs(unsigned long pid, unsigned long lpid,
> > > +				    unsigned long type, unsigned long pg_sizes,
> > > +				    unsigned long start, unsigned long end)
> > > +{
> > > +	unsigned long psize;
> > > +
> > > +	if (pg_sizes & H_RPTI_PAGE_64K) {
> > > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> > > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 16), psize,
> > > +				    start, end);
> > > +	}
> > > +
> > > +	if (pg_sizes & H_RPTI_PAGE_2M) {
> > > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> > > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 21), psize,
> > > +				    start, end);
> > > +	}
> > > +
> > > +	if (pg_sizes & H_RPTI_PAGE_1G) {
> > > +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> > > +		do_h_rpt_invalidate(pid, lpid, type, (1UL << 30), psize,
> > > +				    start, end);
> > > +	}
> > 
> > Hrm.  Here you're stepping through the hcall defined pagesizes, then
> > mapping each one to the Linux internal page size defs.
> > 
> > It might be more elegant to step through mmu_psize_defs table, and
> > conditionally performan an invalidate on that pagesize if the
> > corresponding bit in pg_sizes is set (as noted earlier you could
> > easily add the H_RPTI_PAGE bit to the table).  That way it's a direct
> > table lookup rather than a bunch of ifs or switches.
> 
> Yes, let me give this a try.
> 
> > 
> > > +}
> > > +
> > > +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> > > +				    unsigned long pid, unsigned long target,
> > > +				    unsigned long type, unsigned long pg_sizes,
> > > +				    unsigned long start, unsigned long end)
> > > +{
> > > +	if (!kvm_is_radix(vcpu->kvm))
> > > +		return H_UNSUPPORTED;
> > > +
> > > +	if (kvmhv_on_pseries())
> > > +		return H_UNSUPPORTED;
> > 
> > This doesn't seem quite right.  If you have multiply nested guests,
> > won't the L2 be issueing H_RPT_INVALIDATE hcalls to the L1 on behalf
> > of the L3?  The L1 would have to implement them by calling the L0, but
> > the L1 can't just reject them, no?
> > 
> > Likewise for the !H_RPTI_TYPE_NESTED case, but on what happens to be a
> > nested guest in any case, couldn't this case legitimately arise and
> > need to be handled?
> 
> The approach is to handle this hcall on behalf of all the nested
> guests in L0 only. I am intercepting the nested exit path precisely
> for this as shown in the below hunk.

Ah, I see.  Might be worth commenting that, since it's not necessarily obvious.

> 
> > > @@ -1573,6 +1640,30 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
> > >  		if (!xics_on_xive())
> > >  			kvmppc_xics_rm_complete(vcpu, 0);
> > >  		break;
> > > +	case BOOK3S_INTERRUPT_SYSCALL:
> > > +	{
> > > +		unsigned long req = kvmppc_get_gpr(vcpu, 3);
> > > +
> > > +		if (req != H_RPT_INVALIDATE) {
> > > +			r = RESUME_HOST;
> > > +			break;
> > > +		}
> > > +
> > > +		/*
> > > +		 * The H_RPT_INVALIDATE hcalls issued by nested
> > > +		 * guest for process scoped invalidations when
> > > +		 * GTSE=0 are handled here.
> > > +		 */
> > > +		do_h_rpt_invalidate_prs(kvmppc_get_gpr(vcpu, 4),
> > > +					vcpu->arch.nested->shadow_lpid,
> > > +					kvmppc_get_gpr(vcpu, 5),
> > > +					kvmppc_get_gpr(vcpu, 6),
> > > +					kvmppc_get_gpr(vcpu, 7),
> > > +					kvmppc_get_gpr(vcpu, 8));
> > > +		kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
> > > +		r = RESUME_GUEST;
> > > +		break;
> > > +	}
> > >  	default:
> > >  		r = RESUME_HOST;
> > >  		break;
> 
> Thanks for your review.
> 
> Regards,
> Bharata.
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2021-02-23  3:08 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-15  6:35 [PATCH v4 0/3] Support for H_RPT_INVALIDATE in PowerPC KVM Bharata B Rao
2021-02-15  6:47 ` Bharata B Rao
2021-02-15  6:35 ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests Bharata B Rao
2021-02-15  6:47   ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from gu Bharata B Rao
2021-02-15 13:10   ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests kernel test robot
2021-02-15 13:10     ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro kernel test robot
2021-02-15 13:10     ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests kernel test robot
2021-02-17  0:24   ` David Gibson
2021-02-17  0:24     ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro David Gibson
2021-02-22  6:25     ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations from guests Bharata B Rao
2021-02-22  6:37       ` [PATCH v4 1/3] powerpc/book3s64/radix/tlb: tlbie primitives for process-scoped invalidations fro Bharata B Rao
2021-02-15  6:35 ` [PATCH v4 2/3] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE Bharata B Rao
2021-02-15  6:47   ` Bharata B Rao
2021-02-17  0:38   ` David Gibson
2021-02-17  0:38     ` David Gibson
2021-02-22  6:46     ` Bharata B Rao
2021-02-22  6:58       ` Bharata B Rao
2021-02-22 23:26       ` David Gibson
2021-02-22 23:26         ` David Gibson
2021-02-15  6:35 ` [PATCH v4 3/3] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM Bharata B Rao
2021-02-15  6:47   ` Bharata B Rao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.