[PULL 17/18] KVM: PPC: Book3S HV: Improve H_CONFER implementation

From: Alexander Graf <agraf@suse.de>
To: kvm-ppc@vger.kernel.org
Cc: kvm@vger.kernel.org, pbonzini@redhat.com,
	Sam Bobroff <sam.bobroff@au1.ibm.com>,
	Paul Mackerras <paulus@samba.org>
Subject: [PULL 17/18] KVM: PPC: Book3S HV: Improve H_CONFER implementation
Date: Thu, 18 Dec 2014 01:47:00 +0100	[thread overview]
Message-ID: <1418863621-6630-18-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1418863621-6630-1-git-send-email-agraf@suse.de>

From: Sam Bobroff <sam.bobroff@au1.ibm.com>

Currently the H_CONFER hcall is implemented in kernel virtual mode,
meaning that whenever a guest thread does an H_CONFER, all the threads
in that virtual core have to exit the guest.  This is bad for
performance because it interrupts the other threads even if they
are doing useful work.

The H_CONFER hcall is called by a guest VCPU when it is spinning on a
spinlock and it detects that the spinlock is held by a guest VCPU that
is currently not running on a physical CPU.  The idea is to give this
VCPU's time slice to the holder VCPU so that it can make progress
towards releasing the lock.

To avoid having the other threads exit the guest unnecessarily,
we add a real-mode implementation of H_CONFER that checks whether
the other threads are doing anything.  If all the other threads
are idle (i.e. in H_CEDE) or trying to confer (i.e. in H_CONFER),
it returns H_TOO_HARD which causes a guest exit and allows the
H_CONFER to be handled in virtual mode.

Otherwise it spins for a short time (up to 10 microseconds) to give
other threads the chance to observe that this thread is trying to
confer.  The spin loop also terminates when any thread exits the guest
or when all other threads are idle or trying to confer.  If the
timeout is reached, the H_CONFER returns H_SUCCESS.  In this case the
guest VCPU will recheck the spinlock word and most likely call
H_CONFER again.

This also improves the implementation of the H_CONFER virtual mode
handler.  If the VCPU is part of a virtual core (vcore) which is
runnable, there will be a 'runner' VCPU which has taken responsibility
for running the vcore.  In this case we yield to the runner VCPU
rather than the target VCPU.

We also introduce a check on the target VCPU's yield count: if it
differs from the yield count passed to H_CONFER, the target VCPU
has run since H_CONFER was called and may have already released
the lock.  This check is required by PAPR.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h     |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 41 ++++++++++++++++++++++++++++++++-
 arch/powerpc/kvm/book3s_hv_builtin.c    | 32 +++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  2 +-
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6544187..7efd666a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -295,6 +295,7 @@ struct kvmppc_vcore {
 	ulong dpdes;		/* doorbell state (POWER8) */
 	void *mpp_buffer; /* Micro Partition Prefetch buffer */
 	bool mpp_buffer_is_valid;
+	ulong conferring_threads;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 299351e..de4018a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 	}
 }
 
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct kvmppc_vcore *vcore = target->arch.vcore;
+
+	/*
+	 * We expect to have been called by the real mode handler
+	 * (kvmppc_rm_h_confer()) which would have directly returned
+	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+	 * have useful work to do and should not confer) so we don't
+	 * recheck that here.
+	 */
+
+	spin_lock(&vcore->lock);
+	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	    vcore->vcore_state != VCORE_INACTIVE)
+		target = vcore->runner;
+	spin_unlock(&vcore->lock);
+
+	return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+	int yield_count = 0;
+	struct lppaca *lppaca;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+	if (lppaca)
+		yield_count = lppaca->yield_count;
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return yield_count;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
+	int yield_count;
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
@@ -646,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 			ret = H_PARAMETER;
 			break;
 		}
-		kvm_vcpu_yield_to(tvcpu);
+		yield_count = kvmppc_get_gpr(vcpu, 5);
+		if (kvmppc_get_yield_count(tvcpu) != yield_count)
+			break;
+		kvm_arch_vcpu_yield_to(tvcpu);
 		break;
 	case H_REGISTER_VPA:
 		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -1697,6 +1735,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
 
 	/*
 	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 1786bf8..3e43f81 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -17,6 +17,7 @@
 #include <linux/memblock.h>
 #include <linux/sizes.h>
 #include <linux/cma.h>
+#include <linux/bitops.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
@@ -97,6 +98,37 @@ void __init kvm_cma_reserve(void)
 }
 
 /*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+			    unsigned int yield_count)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	int threads_running;
+	int threads_ceded;
+	int threads_conferring;
+	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+	int rv = H_SUCCESS; /* => don't yield */
+
+	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+		threads_running = VCORE_ENTRY_COUNT(vc);
+		threads_ceded = hweight32(vc->napping_threads);
+		threads_conferring = hweight32(vc->conferring_threads);
+		if (threads_ceded + threads_conferring >= threads_running) {
+			rv = H_TOO_HARD; /* => do yield */
+			break;
+		}
+	}
+	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	return rv;
+}
+
+/*
  * When running HV mode KVM we need to block certain operations while KVM VMs
  * exist in the system. We use a counter of VMs to track this.
  *
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 26a5b8d..0a2d64f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1798,7 +1798,7 @@ hcall_real_table:
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
 	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
-	.long	0		/* 0xe4 */
+	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
 	.long	0		/* 0xf0 */
-- 
1.8.1.4