All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <waiman.long@hp.com>
Cc: tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com,
	paolo.bonzini@gmail.com, konrad.wilk@oracle.com,
	boris.ostrovsky@oracle.com, paulmck@linux.vnet.ibm.com,
	riel@redhat.com, torvalds@linux-foundation.org,
	raghavendra.kt@linux.vnet.ibm.com, david.vrabel@citrix.com,
	oleg@redhat.com, scott.norton@hp.com, doug.hatch@hp.com,
	linux-arch@vger.kernel.org, x86@kernel.org,
	linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	xen-devel@lists.xenproject.org, kvm@vger.kernel.org,
	luto@amacapital.net
Subject: Re: [PATCH 8/9] qspinlock: Generic paravirt support
Date: Thu, 2 Apr 2015 21:48:34 +0200	[thread overview]
Message-ID: <20150402194834.GF32047@worktop.ger.corp.intel.com> (raw)
In-Reply-To: <20150402172057.GA27490@worktop.programming.kicks-ass.net>

On Thu, Apr 02, 2015 at 07:20:57PM +0200, Peter Zijlstra wrote:
> pv_wait_head():
> 
> 	pv_hash()
> 	/* MB as per cmpxchg */
> 	cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL);
> 
> VS
> 
> __pv_queue_spin_unlock():
> 
> 	if (xchg(&l->locked, 0) != _Q_SLOW_VAL)
> 		return;
> 
> 	/* MB as per xchg */
> 	pv_hash_find(lock);
> 
> 


Something like so.. compile tested only.

I took out the LFSR because that was likely over engineering from my
side :-)

--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -2,6 +2,8 @@
 #error "do not include this file"
 #endif
 
+#include <linux/hash.h>
+
 /*
  * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
  * of spinning them.
@@ -107,7 +109,84 @@ static void pv_kick_node(struct mcs_spin
 		pv_kick(pn->cpu);
 }
 
-static DEFINE_PER_CPU(struct qspinlock *, __pv_lock_wait);
+/*
+ * Hash table using open addressing with an linear probe sequence.
+ *
+ * Since we should not be holding locks from NMI context (very rare indeed) the
+ * max load factor is 0.75, which is around the point where open adressing
+ * breaks down.
+ *
+ * Instead of probing just the immediate bucket we probe all buckets in the
+ * same cacheline.
+ *
+ * http://en.wikipedia.org/wiki/Hash_table#Open_addressing
+ *
+ */
+
+struct pv_hash_bucket {
+	struct qspinlock *lock;
+	int cpu;
+};
+
+/*
+ * XXX dynamic allocate using nr_cpu_ids instead...
+ */
+#define PV_LOCK_HASH_BITS	(2 + NR_CPUS_BITS)
+
+#if PV_LOCK_HASH_BITS < 6
+#undef PV_LOCK_HASH_BITS
+#define PB_LOCK_HASH_BITS	6
+#endif
+
+#define PV_LOCK_HASH_SIZE	(1 << PV_LOCK_HASH_BITS)
+
+static struct pv_hash_bucket __pv_lock_hash[PV_LOCK_HASH_SIZE] ____cacheline_aligned;
+
+#define PV_HB_PER_LINE		(SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
+
+static inline u32 hash_align(u32 hash)
+{
+	return hash & ~(PV_HB_PER_LINE - 1);
+}
+
+#define for_each_hash_bucket(hb, off, hash)					\
+	for (hash = hash_align(hash), off = 0, hb = &__pv_lock_hash[hash + off];\
+	    off < PV_LOCK_HASH_SIZE;						\
+	    off++, hb = &__pv_lock_hash[(hash + off) % PV_LOCK_HASH_SIZE])
+
+static struct pv_hash_bucket *pv_hash_insert(struct qspinlock *lock)
+{
+	u32 offset, hash = hash_ptr(lock, PV_LOCK_HASH_BITS);
+	struct pv_hash_bucket *hb;
+
+	for_each_hash_bucket(hb, offset, hash) {
+		if (!cmpxchg(&hb->lock, NULL, lock)) {
+			WRITE_ONCE(hb->cpu, smp_processor_id());
+			return hb;
+		}
+	}
+
+	/*
+	 * Hard assumes there is an empty bucket somewhere.
+	 */
+	BUG();
+}
+
+static struct pv_hash_bucket *pv_hash_find(struct qspinlock *lock)
+{
+	u32 offset, hash = hash_ptr(lock, PV_LOCK_HASH_BITS);
+	struct pv_hash_bucket *hb;
+
+	for_each_hash_bucket(hb, offset, hash) {
+		if (READ_ONCE(hb->lock) == lock)
+			return hb;
+	}
+
+	/*
+	 * Hard assumes we _WILL_ find a match.
+	 */
+	BUG();
+}
 
 /*
  * Wait for l->locked to become clear; halt the vcpu after a short spin.
@@ -116,7 +195,9 @@ static DEFINE_PER_CPU(struct qspinlock *
 static void pv_wait_head(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
+	struct pv_hash_bucket *hb = NULL;
 	int loop;
+	u8 o;
 
 	for (;;) {
 		for (loop = SPIN_THRESHOLD; loop; loop--) {
@@ -126,29 +207,47 @@ static void pv_wait_head(struct qspinloc
 			cpu_relax();
 		}
 
-		this_cpu_write(__pv_lock_wait, lock);
-		/*
-		 * __pv_lock_wait must be set before setting _Q_SLOW_VAL
-		 *
-		 * [S] __pv_lock_wait = lock    [RmW] l = l->locked = 0
-		 *     MB                             MB
-		 * [S] l->locked = _Q_SLOW_VAL  [L]   __pv_lock_wait
-		 *
-		 * Matches the xchg() in pv_queue_spin_unlock().
-		 */
-		if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL))
-			goto done;
+		if (!hb) {
+			hb = pv_hash_insert(lock);
+			/*
+			 * hb  must be set before setting _Q_SLOW_VAL
+			 *
+			 * [S]   hb <- lock               [RmW] l = l->locked = 0
+			 *       MB                             MB
+			 * [RmW] l->locked ?= _Q_SLOW_VAL [L]   hb
+			 *
+			 * Matches the xchg() in pv_queue_spin_unlock().
+			 */
+			o = cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL);
+			if (!o) {
+				/*
+				 * The lock got unlocked before we could set
+				 * _Q_SLOW_VAL, we must unhash ourselves.
+				 */
+				WRITE_ONCE(hb->lock, NULL);
+				goto done;
+			}
+			BUG_ON(o != _Q_LOCKED_VAL);
+			/*
+			 * At this point _Q_SLOW_VAL is visible and the unlock
+			 * will do the lookup. The lookup hard relies on the
+			 * entry being visible -- which it should be. Unlock
+			 * will unhash for us.
+			 */
+		}
 
 		pv_wait(&l->locked, _Q_SLOW_VAL);
+		/*
+		 * We can get spurious wakeups from interrupts, cycle back.
+		 */
 	}
 done:
-	this_cpu_write(__pv_lock_wait, NULL);
-
 	/*
 	 * Lock is unlocked now; the caller will acquire it without waiting.
 	 * As with pv_wait_node() we rely on the caller to do a load-acquire
 	 * for us.
 	 */
+	return;
 }
 
 /*
@@ -158,20 +257,20 @@ static void pv_wait_head(struct qspinloc
 void __pv_queue_spin_unlock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
-	int cpu;
+	struct pv_hash_bucket *hb;
 
 	if (xchg(&l->locked, 0) != _Q_SLOW_VAL)
 		return;
 
 	/*
 	 * At this point the memory pointed at by lock can be freed/reused,
-	 * however we can still use the pointer value to search in our cpu
-	 * array.
+	 * however we can still use the pointer value to search in our hash
+	 * table.
 	 *
-	 * XXX: get rid of this loop
+	 * Also, if we observe _Q_SLOW_VAL we _must_ now observe 'our' hash
+	 * bucket. See pv_wait_head().
 	 */
-	for_each_possible_cpu(cpu) {
-		if (per_cpu(__pv_lock_wait, cpu) == lock)
-			pv_kick(cpu);
-	}
+	hb = pv_hash_find(lock);
+	pv_kick(hb->cpu);
+	WRITE_ONCE(hb->lock, NULL); /* unhash */
 }

  parent reply	other threads:[~2015-04-02 19:48 UTC|newest]

Thread overview: 135+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-16 13:16 [PATCH 0/9] qspinlock stuff -v15 Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 1/9] qspinlock: A simple generic 4-byte queue spinlock Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 2/9] qspinlock, x86: Enable x86-64 to use " Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 3/9] qspinlock: Add pending bit Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 4/9] qspinlock: Extract out code snippets for the next patch Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 5/9] qspinlock: Optimize for smaller NR_CPUS Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 6/9] qspinlock: Use a simple write to grab the lock Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 7/9] qspinlock: Revert to test-and-set on hypervisors Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 8/9] qspinlock: Generic paravirt support Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-18 20:50   ` Waiman Long
2015-03-19 10:12     ` Peter Zijlstra
2015-03-19 10:12     ` Peter Zijlstra
2015-03-19 10:12     ` Peter Zijlstra
2015-03-19 12:25       ` Peter Zijlstra
2015-03-19 12:25         ` Peter Zijlstra
2015-03-19 13:43         ` Peter Zijlstra
2015-03-19 13:43         ` Peter Zijlstra
2015-03-19 13:43         ` Peter Zijlstra
2015-03-19 23:25         ` Waiman Long
2015-03-19 23:25         ` Waiman Long
2015-03-19 23:25         ` Waiman Long
2015-04-01 16:20         ` Waiman Long
2015-04-01 16:20         ` Waiman Long
2015-04-01 16:20           ` Waiman Long
2015-04-01 17:12           ` Peter Zijlstra
2015-04-01 17:12           ` Peter Zijlstra
2015-04-01 17:12           ` Peter Zijlstra
2015-04-01 17:42             ` Peter Zijlstra
2015-04-01 17:42               ` Peter Zijlstra
2015-04-01 18:17               ` Peter Zijlstra
2015-04-01 18:17                 ` Peter Zijlstra
2015-04-01 18:54                 ` Waiman Long
2015-04-01 18:54                   ` Waiman Long
2015-04-01 18:48                   ` Peter Zijlstra
2015-04-01 18:48                   ` Peter Zijlstra
2015-04-01 19:58                     ` Waiman Long
2015-04-01 21:03                       ` Peter Zijlstra
2015-04-01 21:03                       ` Peter Zijlstra
2015-04-01 21:03                         ` Peter Zijlstra
2015-04-02 16:28                         ` Waiman Long
2015-04-02 17:20                           ` Peter Zijlstra
2015-04-02 17:20                             ` Peter Zijlstra
2015-04-02 19:48                             ` Peter Zijlstra
2015-04-02 19:48                             ` Peter Zijlstra [this message]
2015-04-03  3:39                               ` Waiman Long
2015-04-03  3:39                                 ` Waiman Long
2015-04-03  3:39                               ` Waiman Long
2015-04-03 13:43                               ` Peter Zijlstra
2015-04-03 13:43                               ` Peter Zijlstra
2015-04-03 13:43                                 ` Peter Zijlstra
2015-04-02 19:48                             ` Peter Zijlstra
2015-04-02 17:20                           ` Peter Zijlstra
2015-04-02 16:28                         ` Waiman Long
2015-04-02 16:28                         ` Waiman Long
2015-04-01 19:58                     ` Waiman Long
2015-04-01 19:58                     ` Waiman Long
2015-04-01 18:48                   ` Peter Zijlstra
2015-04-01 18:54                 ` Waiman Long
2015-04-01 18:17               ` Peter Zijlstra
2015-04-01 17:42             ` Peter Zijlstra
2015-04-01 20:10             ` Waiman Long
2015-04-01 20:10             ` Waiman Long
2015-04-01 20:10             ` Waiman Long
2015-03-19 12:25       ` Peter Zijlstra
2015-03-18 20:50   ` Waiman Long
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 9/9] qspinlock, x86, kvm: Implement KVM support for paravirt qspinlock Peter Zijlstra
2015-03-16 13:16 ` [PATCH 9/9] qspinlock,x86,kvm: " Peter Zijlstra
2015-03-16 13:16   ` [PATCH 9/9] qspinlock, x86, kvm: " Peter Zijlstra
2015-03-19  2:45   ` Waiman Long
2015-03-19 10:01     ` Peter Zijlstra
2015-03-19 10:01     ` [PATCH 9/9] qspinlock,x86,kvm: " Peter Zijlstra
2015-03-19 10:01       ` Peter Zijlstra
2015-03-19 21:08       ` [PATCH 9/9] qspinlock, x86, kvm: " Waiman Long
2015-03-19 21:08       ` [PATCH 9/9] qspinlock,x86,kvm: " Waiman Long
2015-03-19 21:08         ` [PATCH 9/9] qspinlock, x86, kvm: " Waiman Long
2015-03-20  7:43         ` Raghavendra K T
2015-03-20  7:43         ` [PATCH 9/9] qspinlock,x86,kvm: " Raghavendra K T
2015-03-20  7:43           ` [PATCH 9/9] qspinlock, x86, kvm: " Raghavendra K T
2015-03-19  2:45   ` Waiman Long
2015-03-16 14:08 ` [PATCH 0/9] qspinlock stuff -v15 David Vrabel
2015-03-16 14:08 ` [Xen-devel] " David Vrabel
2015-03-16 14:08   ` David Vrabel
2015-03-16 14:08   ` David Vrabel
2015-03-16 14:08   ` David Vrabel
2015-03-18 20:36 ` Waiman Long
2015-03-18 20:36 ` Waiman Long
2015-03-18 20:36 ` Waiman Long
2015-03-19 18:01 ` [Xen-devel] " David Vrabel
2015-03-19 18:01 ` David Vrabel
2015-03-19 18:01   ` David Vrabel
2015-03-19 18:32   ` Peter Zijlstra
2015-03-19 18:32     ` Peter Zijlstra
2015-03-19 18:32   ` Peter Zijlstra
2015-03-19 18:01 ` David Vrabel
2015-03-25 19:47 ` Konrad Rzeszutek Wilk
2015-03-26 20:21   ` Peter Zijlstra
2015-03-26 20:21   ` Peter Zijlstra
2015-03-26 20:21     ` Peter Zijlstra
2015-03-27 14:07     ` Konrad Rzeszutek Wilk
2015-03-27 14:07     ` Konrad Rzeszutek Wilk
2015-03-27 14:07     ` Konrad Rzeszutek Wilk
2015-03-30 16:41       ` Waiman Long
2015-03-30 16:41       ` Waiman Long
2015-03-30 16:41       ` Waiman Long
2015-03-30 16:25   ` Waiman Long
2015-03-30 16:25   ` Waiman Long
2015-03-30 16:29     ` Peter Zijlstra
2015-03-30 16:29       ` Peter Zijlstra
2015-03-30 16:43       ` Waiman Long
2015-03-30 16:43       ` Waiman Long
2015-03-30 16:43         ` Waiman Long
2015-03-30 16:29     ` Peter Zijlstra
2015-03-30 16:25   ` Waiman Long
2015-03-25 19:47 ` Konrad Rzeszutek Wilk
2015-03-25 19:47 ` Konrad Rzeszutek Wilk
2015-03-27  6:40 ` Raghavendra K T
2015-03-27  6:40 ` Raghavendra K T
2015-03-27  6:40 ` Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150402194834.GF32047@worktop.ger.corp.intel.com \
    --to=peterz@infradead.org \
    --cc=boris.ostrovsky@oracle.com \
    --cc=david.vrabel@citrix.com \
    --cc=doug.hatch@hp.com \
    --cc=hpa@zytor.com \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=paolo.bonzini@gmail.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=raghavendra.kt@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=scott.norton@hp.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=waiman.long@hp.com \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.