All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <waiman.long@hp.com>
Cc: linux-arch@vger.kernel.org, riel@redhat.com, x86@kernel.org,
	kvm@vger.kernel.org, scott.norton@hp.com,
	raghavendra.kt@linux.vnet.ibm.com, paolo.bonzini@gmail.com,
	oleg@redhat.com, linux-kernel@vger.kernel.org, mingo@redhat.com,
	david.vrabel@citrix.com, hpa@zytor.com, luto@amacapital.net,
	xen-devel@lists.xenproject.org, tglx@linutronix.de,
	paulmck@linux.vnet.ibm.com, torvalds@linux-foundation.org,
	boris.ostrovsky@oracle.com,
	virtualization@lists.linux-foundation.org, doug.hatch@hp.com
Subject: Re: [PATCH 8/9] qspinlock: Generic paravirt support
Date: Thu, 19 Mar 2015 13:25:36 +0100	[thread overview]
Message-ID: <20150319122536.GD11574__4821.82866330749$1426768060$gmane$org@worktop.ger.corp.intel.com> (raw)
In-Reply-To: <20150319101242.GM21418@twins.programming.kicks-ass.net>

On Thu, Mar 19, 2015 at 11:12:42AM +0100, Peter Zijlstra wrote:
> So I was now thinking of hashing the lock pointer; let me go and quickly
> put something together.

A little something like so; ideally we'd allocate the hashtable since
NR_CPUS is kinda bloated, but it shows the idea I think.

And while this has loops in (the rehashing thing) their fwd progress
does not depend on other CPUs.

And I suspect that for the typical lock contention scenarios its
unlikely we ever really get into long rehashing chains.

---
 include/linux/lfsr.h                |   49 ++++++++++++
 kernel/locking/qspinlock_paravirt.h |  143 ++++++++++++++++++++++++++++++++----
 2 files changed, 178 insertions(+), 14 deletions(-)

--- /dev/null
+++ b/include/linux/lfsr.h
@@ -0,0 +1,49 @@
+#ifndef _LINUX_LFSR_H
+#define _LINUX_LFSR_H
+
+/*
+ * Simple Binary Galois Linear Feedback Shift Register
+ *
+ * http://en.wikipedia.org/wiki/Linear_feedback_shift_register
+ *
+ */
+
+extern void __lfsr_needs_more_taps(void);
+
+static __always_inline u32 lfsr_taps(int bits)
+{
+	if (bits ==  1) return 0x0001;
+	if (bits ==  2) return 0x0001;
+	if (bits ==  3) return 0x0003;
+	if (bits ==  4) return 0x0009;
+	if (bits ==  5) return 0x0012;
+	if (bits ==  6) return 0x0021;
+	if (bits ==  7) return 0x0041;
+	if (bits ==  8) return 0x008E;
+	if (bits ==  9) return 0x0108;
+	if (bits == 10) return 0x0204;
+	if (bits == 11) return 0x0402;
+	if (bits == 12) return 0x0829;
+	if (bits == 13) return 0x100D;
+	if (bits == 14) return 0x2015;
+
+	/*
+	 * For more taps see:
+	 *   http://users.ece.cmu.edu/~koopman/lfsr/index.html
+	 */
+	__lfsr_needs_more_taps();
+
+	return 0;
+}
+
+static inline u32 lfsr(u32 val, int bits)
+{
+	u32 bit = val & 1;
+
+	val >>= 1;
+	if (bit)
+		val ^= lfsr_taps(bits);
+	return val;
+}
+
+#endif /* _LINUX_LFSR_H */
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -2,6 +2,9 @@
 #error "do not include this file"
 #endif
 
+#include <linux/hash.h>
+#include <linux/lfsr.h>
+
 /*
  * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
  * of spinning them.
@@ -107,7 +110,120 @@ static void pv_kick_node(struct mcs_spin
 		pv_kick(pn->cpu);
 }
 
-static DEFINE_PER_CPU(struct qspinlock *, __pv_lock_wait);
+/*
+ * Hash table using open addressing with an LFSR probe sequence.
+ *
+ * Since we should not be holding locks from NMI context (very rare indeed) the
+ * max load factor is 0.75, which is around the point where open addressing
+ * breaks down.
+ *
+ * Instead of probing just the immediate bucket we probe all buckets in the
+ * same cacheline.
+ *
+ * http://en.wikipedia.org/wiki/Hash_table#Open_addressing
+ *
+ */
+
+#define HB_RESERVED	((struct qspinlock *)1)
+
+struct pv_hash_bucket {
+	struct qspinlock *lock;
+	int cpu;
+};
+
+/*
+ * XXX dynamic allocate using nr_cpu_ids instead...
+ */
+#define PV_LOCK_HASH_BITS	(2 + NR_CPUS_BITS)
+
+#if PV_LOCK_HASH_BITS < 6
+#undef PV_LOCK_HASH_BITS
+#define PB_LOCK_HASH_BITS	6
+#endif
+
+#define PV_LOCK_HASH_SIZE	(1 << PV_LOCK_HASH_BITS)
+
+static struct pv_hash_bucket __pv_lock_hash[PV_LOCK_HASH_SIZE] ____cacheline_aligned;
+
+#define PV_HB_PER_LINE		(SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
+
+static inline u32 hash_align(u32 hash)
+{
+	return hash & ~(PV_HB_PER_LINE - 1);
+}
+
+static struct qspinlock **pv_hash(struct qspinlock *lock)
+{
+	u32 hash = hash_ptr(lock, PV_LOCK_HASH_BITS);
+	struct pv_hash_bucket *hb, *end;
+
+	if (!hash)
+		hash = 1;
+
+	hb = &__pv_lock_hash[hash_align(hash)];
+	for (;;) {
+		for (end = hb + PV_HB_PER_LINE; hb < end; hb++) {
+			if (cmpxchg(&hb->lock, NULL, HB_RESERVED)) {
+				WRITE_ONCE(hb->cpu, smp_processor_id());
+				/*
+				 * Since we must read lock first and cpu
+				 * second, we must write cpu first and lock
+				 * second, therefore use HB_RESERVE to mark an
+				 * entry in use before writing the values.
+				 *
+				 * This can cause hb_hash_find() to not find a
+				 * cpu even though _Q_SLOW_VAL, this is not a
+				 * problem since we re-check l->locked before
+				 * going to sleep and the unlock will have
+				 * cleared l->locked already.
+				 */
+				smp_wmb(); /* matches rmb from pv_hash_find */
+				WRITE_ONCE(hb->lock, lock);
+				goto done;
+			}
+		}
+
+		hash = lfsr(hash, PV_LOCK_HASH_BITS);
+		hb = &__pv_lock_hash[hash_align(hash)];
+	}
+
+done:
+	return &hb->lock;
+}
+
+static int pv_hash_find(struct qspinlock *lock)
+{
+	u64 hash = hash_ptr(lock, PV_LOCK_HASH_BITS);
+	struct pv_hash_bucket *hb, *end;
+	int cpu = -1;
+
+	if (!hash)
+		hash = 1;
+
+	hb = &__pv_lock_hash[hash_align(hash)];
+	for (;;) {
+		for (end = hb + PV_HB_PER_LINE; hb < end; hb++) {
+			struct qspinlock *l = READ_ONCE(hb->lock);
+
+			/*
+			 * If we hit an unused bucket, there is no match.
+			 */
+			if (!l)
+				goto done;
+
+			if (l == lock) {
+				smp_rmb(); /* matches wmb from pv_hash() */
+				cpu = READ_ONCE(hb->cpu);
+				goto done;
+			}
+		}
+
+		hash = lfsr(hash, PV_LOCK_HASH_BITS);
+		hb = &__pv_lock_hash[hash_align(hash)];
+	}
+done:
+	return cpu;
+}
 
 /*
  * Wait for l->locked to become clear; halt the vcpu after a short spin.
@@ -116,6 +232,7 @@ static DEFINE_PER_CPU(struct qspinlock *
 static void pv_wait_head(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
+	struct qspinlock **lp = NULL;
 	int loop;
 
 	for (;;) {
@@ -126,13 +243,13 @@ static void pv_wait_head(struct qspinloc
 			cpu_relax();
 		}
 
-		this_cpu_write(__pv_lock_wait, lock);
+		lp = pv_hash(lock);
 		/*
-		 * __pv_lock_wait must be set before setting _Q_SLOW_VAL
+		 * lp  must be set before setting _Q_SLOW_VAL
 		 *
-		 * [S] __pv_lock_wait = lock    [RmW] l = l->locked = 0
+		 * [S] lp = lock                [RmW] l = l->locked = 0
 		 *     MB                             MB
-		 * [S] l->locked = _Q_SLOW_VAL  [L]   __pv_lock_wait
+		 * [S] l->locked = _Q_SLOW_VAL  [L]   lp
 		 *
 		 * Matches the xchg() in pv_queue_spin_unlock().
 		 */
@@ -142,7 +259,8 @@ static void pv_wait_head(struct qspinloc
 		pv_wait(&l->locked, _Q_SLOW_VAL);
 	}
 done:
-	this_cpu_write(__pv_lock_wait, NULL);
+	if (lp)
+		WRITE_ONCE(*lp, NULL);
 
 	/*
 	 * Lock is unlocked now; the caller will acquire it without waiting.
@@ -165,13 +283,10 @@ void __pv_queue_spin_unlock(struct qspin
 
 	/*
 	 * At this point the memory pointed at by lock can be freed/reused,
-	 * however we can still use the pointer value to search in our cpu
-	 * array.
-	 *
-	 * XXX: get rid of this loop
+	 * however we can still use the pointer value to search in our hash
+	 * table.
 	 */
-	for_each_possible_cpu(cpu) {
-		if (per_cpu(__pv_lock_wait, cpu) == lock)
-			pv_kick(cpu);
-	}
+	cpu = pv_hash_find(lock);
+	if (cpu >= 0)
+		pv_kick(cpu);
 }

  parent reply	other threads:[~2015-03-19 12:25 UTC|newest]

Thread overview: 135+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-16 13:16 [PATCH 0/9] qspinlock stuff -v15 Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 1/9] qspinlock: A simple generic 4-byte queue spinlock Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 2/9] qspinlock, x86: Enable x86-64 to use " Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 3/9] qspinlock: Add pending bit Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 4/9] qspinlock: Extract out code snippets for the next patch Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 5/9] qspinlock: Optimize for smaller NR_CPUS Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 6/9] qspinlock: Use a simple write to grab the lock Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 7/9] qspinlock: Revert to test-and-set on hypervisors Peter Zijlstra
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 8/9] qspinlock: Generic paravirt support Peter Zijlstra
2015-03-16 13:16   ` Peter Zijlstra
2015-03-18 20:50   ` Waiman Long
2015-03-19 10:12     ` Peter Zijlstra
2015-03-19 10:12     ` Peter Zijlstra
2015-03-19 10:12     ` Peter Zijlstra
2015-03-19 12:25       ` Peter Zijlstra
2015-03-19 12:25         ` Peter Zijlstra
2015-03-19 13:43         ` Peter Zijlstra
2015-03-19 13:43         ` Peter Zijlstra
2015-03-19 13:43         ` Peter Zijlstra
2015-03-19 23:25         ` Waiman Long
2015-03-19 23:25         ` Waiman Long
2015-03-19 23:25         ` Waiman Long
2015-04-01 16:20         ` Waiman Long
2015-04-01 16:20         ` Waiman Long
2015-04-01 16:20           ` Waiman Long
2015-04-01 17:12           ` Peter Zijlstra
2015-04-01 17:12           ` Peter Zijlstra
2015-04-01 17:12           ` Peter Zijlstra
2015-04-01 17:42             ` Peter Zijlstra
2015-04-01 17:42               ` Peter Zijlstra
2015-04-01 18:17               ` Peter Zijlstra
2015-04-01 18:17                 ` Peter Zijlstra
2015-04-01 18:54                 ` Waiman Long
2015-04-01 18:54                   ` Waiman Long
2015-04-01 18:48                   ` Peter Zijlstra
2015-04-01 18:48                   ` Peter Zijlstra
2015-04-01 19:58                     ` Waiman Long
2015-04-01 21:03                       ` Peter Zijlstra
2015-04-01 21:03                       ` Peter Zijlstra
2015-04-01 21:03                         ` Peter Zijlstra
2015-04-02 16:28                         ` Waiman Long
2015-04-02 17:20                           ` Peter Zijlstra
2015-04-02 17:20                             ` Peter Zijlstra
2015-04-02 19:48                             ` Peter Zijlstra
2015-04-02 19:48                             ` Peter Zijlstra
2015-04-03  3:39                               ` Waiman Long
2015-04-03  3:39                                 ` Waiman Long
2015-04-03  3:39                               ` Waiman Long
2015-04-03 13:43                               ` Peter Zijlstra
2015-04-03 13:43                               ` Peter Zijlstra
2015-04-03 13:43                                 ` Peter Zijlstra
2015-04-02 19:48                             ` Peter Zijlstra
2015-04-02 17:20                           ` Peter Zijlstra
2015-04-02 16:28                         ` Waiman Long
2015-04-02 16:28                         ` Waiman Long
2015-04-01 19:58                     ` Waiman Long
2015-04-01 19:58                     ` Waiman Long
2015-04-01 18:48                   ` Peter Zijlstra
2015-04-01 18:54                 ` Waiman Long
2015-04-01 18:17               ` Peter Zijlstra
2015-04-01 17:42             ` Peter Zijlstra
2015-04-01 20:10             ` Waiman Long
2015-04-01 20:10             ` Waiman Long
2015-04-01 20:10             ` Waiman Long
2015-03-19 12:25       ` Peter Zijlstra [this message]
2015-03-18 20:50   ` Waiman Long
2015-03-16 13:16 ` Peter Zijlstra
2015-03-16 13:16 ` [PATCH 9/9] qspinlock, x86, kvm: Implement KVM support for paravirt qspinlock Peter Zijlstra
2015-03-16 13:16 ` [PATCH 9/9] qspinlock,x86,kvm: " Peter Zijlstra
2015-03-16 13:16   ` [PATCH 9/9] qspinlock, x86, kvm: " Peter Zijlstra
2015-03-19  2:45   ` Waiman Long
2015-03-19 10:01     ` Peter Zijlstra
2015-03-19 10:01     ` [PATCH 9/9] qspinlock,x86,kvm: " Peter Zijlstra
2015-03-19 10:01       ` Peter Zijlstra
2015-03-19 21:08       ` [PATCH 9/9] qspinlock, x86, kvm: " Waiman Long
2015-03-19 21:08       ` [PATCH 9/9] qspinlock,x86,kvm: " Waiman Long
2015-03-19 21:08         ` [PATCH 9/9] qspinlock, x86, kvm: " Waiman Long
2015-03-20  7:43         ` Raghavendra K T
2015-03-20  7:43         ` [PATCH 9/9] qspinlock,x86,kvm: " Raghavendra K T
2015-03-20  7:43           ` [PATCH 9/9] qspinlock, x86, kvm: " Raghavendra K T
2015-03-19  2:45   ` Waiman Long
2015-03-16 14:08 ` [PATCH 0/9] qspinlock stuff -v15 David Vrabel
2015-03-16 14:08 ` [Xen-devel] " David Vrabel
2015-03-16 14:08   ` David Vrabel
2015-03-16 14:08   ` David Vrabel
2015-03-16 14:08   ` David Vrabel
2015-03-18 20:36 ` Waiman Long
2015-03-18 20:36 ` Waiman Long
2015-03-18 20:36 ` Waiman Long
2015-03-19 18:01 ` [Xen-devel] " David Vrabel
2015-03-19 18:01 ` David Vrabel
2015-03-19 18:01   ` David Vrabel
2015-03-19 18:32   ` Peter Zijlstra
2015-03-19 18:32     ` Peter Zijlstra
2015-03-19 18:32   ` Peter Zijlstra
2015-03-19 18:01 ` David Vrabel
2015-03-25 19:47 ` Konrad Rzeszutek Wilk
2015-03-26 20:21   ` Peter Zijlstra
2015-03-26 20:21   ` Peter Zijlstra
2015-03-26 20:21     ` Peter Zijlstra
2015-03-27 14:07     ` Konrad Rzeszutek Wilk
2015-03-27 14:07     ` Konrad Rzeszutek Wilk
2015-03-27 14:07     ` Konrad Rzeszutek Wilk
2015-03-30 16:41       ` Waiman Long
2015-03-30 16:41       ` Waiman Long
2015-03-30 16:41       ` Waiman Long
2015-03-30 16:25   ` Waiman Long
2015-03-30 16:25   ` Waiman Long
2015-03-30 16:29     ` Peter Zijlstra
2015-03-30 16:29       ` Peter Zijlstra
2015-03-30 16:43       ` Waiman Long
2015-03-30 16:43       ` Waiman Long
2015-03-30 16:43         ` Waiman Long
2015-03-30 16:29     ` Peter Zijlstra
2015-03-30 16:25   ` Waiman Long
2015-03-25 19:47 ` Konrad Rzeszutek Wilk
2015-03-25 19:47 ` Konrad Rzeszutek Wilk
2015-03-27  6:40 ` Raghavendra K T
2015-03-27  6:40 ` Raghavendra K T
2015-03-27  6:40 ` Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='20150319122536.GD11574__4821.82866330749$1426768060$gmane$org@worktop.ger.corp.intel.com' \
    --to=peterz@infradead.org \
    --cc=boris.ostrovsky@oracle.com \
    --cc=david.vrabel@citrix.com \
    --cc=doug.hatch@hp.com \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=paolo.bonzini@gmail.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=raghavendra.kt@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=scott.norton@hp.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=waiman.long@hp.com \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.