From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753458AbbGVUOi (ORCPT <rfc822;w@1wt.eu>);
	Wed, 22 Jul 2015 16:14:38 -0400
Received: from g9t5008.houston.hp.com ([15.240.92.66]:51424 "EHLO
	g9t5008.houston.hp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1753294AbbGVUNX (ORCPT
	<rfc822;linux-kernel@vger.kernel.org>);
	Wed, 22 Jul 2015 16:13:23 -0400
From: Waiman Long <Waiman.Long@hp.com>
To: Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@redhat.com>,
        Thomas Gleixner <tglx@linutronix.de>, "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
        Scott J Norton <scott.norton@hp.com>,
        Douglas Hatch <doug.hatch@hp.com>, Davidlohr Bueso <dave@stgolabs.net>,
        Waiman Long <Waiman.Long@hp.com>
Subject: [PATCH v3 4/7] locking/pvqspinlock: Enable deferment of vCPU kicking to unlock call
Date: Wed, 22 Jul 2015 16:12:39 -0400
Message-Id: <1437595962-21472-5-git-send-email-Waiman.Long@hp.com>
X-Mailer: git-send-email 1.7.1
In-Reply-To: <1437595962-21472-1-git-send-email-Waiman.Long@hp.com>
References: <1437595962-21472-1-git-send-email-Waiman.Long@hp.com>
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org

Most of the vCPU kickings are done on the locking side where the new
lock holder wake up the queue head vCPU to spin on the lock. However,
there are situations where it may be advantageous to defer the vCPU
kicking to when the lock holder releases the lock.

This patch enables the deferment of vCPU kicking to the unlock function
by adding a new vCPU state (vcpu_hashed) to marks the fact that
 1) _Q_SLOW_VAL is set in the lock, and
 2) the pv_node address is stored in the hash table

This enablement patch, by itself, should not change the performance
of the pvqspinlock code. Actual deferment vCPU kicks will be added
in a later patch.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 kernel/locking/qspinlock.c          |    6 +++---
 kernel/locking/qspinlock_paravirt.h |   34 ++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 6518ee9..94fdd27 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -259,8 +259,8 @@ static __always_inline void set_locked(struct qspinlock *lock)
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_kick_node(struct qspinlock *lock,
+					   struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
 					   struct mcs_spinlock *node) { }
 
@@ -464,7 +464,7 @@ queue:
 		cpu_relax();
 
 	arch_mcs_spin_unlock_contended(&next->locked);
-	pv_kick_node(next);
+	pv_kick_node(lock, next);
 
 release:
 	/*
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index db8d08b..a4986c6 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -33,6 +33,7 @@
 enum vcpu_state {
 	vcpu_running = 0,
 	vcpu_halted,
+	vcpu_hashed,	/* vcpu_halted + node stored in hash table */
 };
 
 struct pv_node {
@@ -404,13 +405,17 @@ static void pv_wait_node(struct mcs_spinlock *node)
 			pv_wait(&pn->state, vcpu_halted);
 		}
 
+		if (READ_ONCE(node->locked))
+			break;
+
 		/*
-		 * Reset the vCPU state to avoid unncessary CPU kicking
+		 * Reset the vCPU state to running to avoid unncessary CPU
+		 * kicking unless vcpu_hashed had already been set. In this
+		 * case, node->locked should have just been set, and we
+		 * aren't going to set state to vcpu_halted again.
 		 */
-		WRITE_ONCE(pn->state, vcpu_running);
+		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
 
-		if (READ_ONCE(node->locked))
-			break;
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
 		 * spurious wakeup and the vCPU should wait again. However,
@@ -429,12 +434,16 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
 /*
  * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * pv_wait_node(). Alternatively, it can also defer the kicking to the
+ * unlock function.
  */
-static void pv_kick_node(struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
 
+	if (xchg(&pn->state, vcpu_running) != vcpu_halted)
+		return;
+
 	/*
 	 * Note that because node->locked is already set, this actual
 	 * mcs_spinlock entry could be re-used already.
@@ -444,10 +453,8 @@ static void pv_kick_node(struct mcs_spinlock *node)
 	 *
 	 * See the comment in pv_wait_node().
 	 */
-	if (xchg(&pn->state, vcpu_running) == vcpu_halted) {
-		pvstat_inc(pvstat_lock_kick);
-		pv_kick(pn->cpu);
-	}
+	pvstat_inc(pvstat_lock_kick);
+	pv_kick(pn->cpu);
 }
 
 /*
@@ -469,6 +476,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 			cpu_relax();
 		}
 
+		if (!lp && (xchg(&pn->state, vcpu_hashed) == vcpu_hashed))
+			/*
+			 * The hashed table & _Q_SLOW_VAL had been filled
+			 * by the lock holder.
+			 */
+			lp = (struct qspinlock **)-1;
+
 		if (!lp) { /* ONCE */
 			lp = pv_hash(lock, pn);
 			/*
-- 
1.7.1