* [PATCH v3 1/6] qspinlock: powerpc support qspinlock
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
@ 2016-05-25 8:18 ` Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 2/6] powerpc: pseries/Kconfig: Add qspinlock build config Pan Xinhui
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Pan Xinhui @ 2016-05-25 8:18 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, jeremy, chrisw,
akataria, rusty, Pan Xinhui
Base code to enable qspinlock on powerpc. this patch add some #ifdef
here and there. Although there is no paravirt related code, we can
successfully build a qspinlock kernel after apply this patch.
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/qspinlock.h | 22 ++++++++++++++++++++++
arch/powerpc/include/asm/spinlock.h | 27 +++++++++++++++------------
arch/powerpc/include/asm/spinlock_types.h | 4 ++++
arch/powerpc/lib/locks.c | 4 ++++
4 files changed, 45 insertions(+), 12 deletions(-)
create mode 100644 arch/powerpc/include/asm/qspinlock.h
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 0000000..5883954
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+#define SPIN_THRESHOLD (1 << 15)
+#define queued_spin_unlock queued_spin_unlock
+
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+ /* no load/store can be across the unlock()*/
+ smp_store_release((u8 *)lock, 0);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 523673d..4359ee6 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -52,6 +52,20 @@
#define SYNC_IO
#endif
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
@@ -106,18 +120,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
* held. Conveniently, we have a word in the paca that holds this
* value.
*/
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x) barrier()
-#define __rw_yield(x) barrier()
-#define SHARED_PROCESSOR 0
-#endif
-
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
CLEAR_IO_SYNC;
@@ -169,6 +171,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc..bd7144e 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,15 @@
# error "please don't include this file directly"
#endif
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#endif
typedef struct {
volatile signed int lock;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index f7deebd..a9ebd71 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
#include <asm/hvcall.h>
#include <asm/smp.h>
+#ifndef CONFIG_QUEUED_SPINLOCKS
void __spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -42,6 +43,7 @@ void __spin_yield(arch_spinlock_t *lock)
get_hard_smp_processor_id(holder_cpu), yield_count);
}
EXPORT_SYMBOL_GPL(__spin_yield);
+#endif
/*
* Waiting for a read lock or a write lock on a rwlock...
@@ -69,6 +71,7 @@ void __rw_yield(arch_rwlock_t *rw)
}
#endif
+#ifndef CONFIG_QUEUED_SPINLOCKS
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_mb();
@@ -84,3 +87,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v3 2/6] powerpc: pseries/Kconfig: Add qspinlock build config
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 1/6] qspinlock: powerpc support qspinlock Pan Xinhui
@ 2016-05-25 8:18 ` Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function Pan Xinhui
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Pan Xinhui @ 2016-05-25 8:18 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, jeremy, chrisw,
akataria, rusty, Pan Xinhui
pseries will use qspinlock by default.
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index bec90fb..f669323 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -21,6 +21,7 @@ config PPC_PSERIES
select HOTPLUG_CPU if SMP
select ARCH_RANDOM
select PPC_DOORBELL
+ select ARCH_USE_QUEUED_SPINLOCKS
default y
config PPC_SPLPAR
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v3 3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 1/6] qspinlock: powerpc support qspinlock Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 2/6] powerpc: pseries/Kconfig: Add qspinlock build config Pan Xinhui
@ 2016-05-25 8:18 ` Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 4/6] pv-qspinlock: powerpc support pv-qspinlock Pan Xinhui
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Pan Xinhui @ 2016-05-25 8:18 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, jeremy, chrisw,
akataria, rusty, Pan Xinhui
pv-qspinlock core has pv_wait/pv_kick which will give a better
performace by yielding and kicking cpu at some cases.
lets support them by adding two corresponding helper functions.
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/spinlock.h | 4 ++++
arch/powerpc/lib/locks.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4359ee6..3b65372 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -56,9 +56,13 @@
/* We only yield to the hypervisor if we are in shared processor mode */
#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
extern void __spin_yield(arch_spinlock_t *lock);
+extern void __spin_yield_cpu(int cpu);
+extern void __spin_wake_cpu(int cpu);
extern void __rw_yield(arch_rwlock_t *lock);
#else /* SPLPAR */
#define __spin_yield(x) barrier()
+#define __spin_yield_cpu(x) barrier()
+#define __spin_wake_cpu(x) barrier()
#define __rw_yield(x) barrier()
#define SHARED_PROCESSOR 0
#endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index a9ebd71..3a58834 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,39 @@
#include <asm/hvcall.h>
#include <asm/smp.h>
+void __spin_yield_cpu(int cpu)
+{
+ unsigned int holder_cpu = cpu, yield_count;
+
+ if (cpu == -1) {
+ plpar_hcall_norets(H_CONFER, -1, 0);
+ return;
+ }
+ BUG_ON(holder_cpu >= nr_cpu_ids);
+ yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+ if ((yield_count & 1) == 0) {
+ /* if target cpu is running, confer slices to lpar*/
+ plpar_hcall_norets(H_CONFER, -1, 0);
+ return;
+ }
+ plpar_hcall_norets(H_CONFER,
+ get_hard_smp_processor_id(holder_cpu), yield_count);
+}
+EXPORT_SYMBOL_GPL(__spin_yield_cpu);
+
+void __spin_wake_cpu(int cpu)
+{
+ unsigned int holder_cpu = cpu, yield_count;
+
+ BUG_ON(holder_cpu >= nr_cpu_ids);
+ yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ plpar_hcall_norets(H_PROD,
+ get_hard_smp_processor_id(holder_cpu));
+}
+EXPORT_SYMBOL_GPL(__spin_wake_cpu);
+
#ifndef CONFIG_QUEUED_SPINLOCKS
void __spin_yield(arch_spinlock_t *lock)
{
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v3 4/6] pv-qspinlock: powerpc support pv-qspinlock
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
` (2 preceding siblings ...)
2016-05-25 8:18 ` [PATCH v3 3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function Pan Xinhui
@ 2016-05-25 8:18 ` Pan Xinhui
2016-05-25 8:18 ` [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock Pan Xinhui
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Pan Xinhui @ 2016-05-25 8:18 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, jeremy, chrisw,
akataria, rusty, Pan Xinhui
As we need let pv-qspinlock-kernel run on all environment which might
have no powervm, we should runtime choose which qspinlock version to
use.
The default pv-qspinlock use native version. pv_lock initialization
should be done in bootstage with irq disabled. And if there is PHYP,
restore pv_lock_ops callbacks to pv version.
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/qspinlock.h | 17 ++++++++
arch/powerpc/include/asm/qspinlock_paravirt.h | 38 ++++++++++++++++++
.../powerpc/include/asm/qspinlock_paravirt_types.h | 13 +++++++
arch/powerpc/kernel/paravirt.c | 45 ++++++++++++++++++++++
arch/powerpc/platforms/pseries/setup.c | 5 +++
5 files changed, 118 insertions(+)
create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h
create mode 100644 arch/powerpc/kernel/paravirt.c
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 5883954..4728f6e 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -12,10 +12,27 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock)
smp_store_release((u8 *)lock, 0);
}
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+#define __ARCH_NEED_PV_HASH_LOOKUP
+
+#include <asm/qspinlock_paravirt.h>
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+ pv_queued_spin_lock(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ pv_queued_spin_unlock(lock);
+}
+#else
static inline void queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
}
+#endif
#include <asm-generic/qspinlock.h>
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h
new file mode 100644
index 0000000..cd17a79
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,38 @@
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#error "do not include this file"
+#endif
+
+#ifndef _ASM_QSPINLOCK_PARAVIRT_H
+#define _ASM_QSPINLOCK_PARAVIRT_H
+
+#include <asm/qspinlock_paravirt_types.h>
+
+extern void pv_lock_init(void);
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val)
+{
+ CLEAR_IO_SYNC;
+ pv_lock_op.lock(lock, val);
+}
+
+static inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ SYNC_IO;
+ pv_lock_op.unlock(lock);
+}
+
+static inline void pv_wait(u8 *ptr, u8 val, int lockcpu)
+{
+ pv_lock_op.wait(ptr, val, lockcpu);
+}
+
+static inline void pv_kick(int cpu)
+{
+ pv_lock_op.kick(cpu);
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt_types.h b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
new file mode 100644
index 0000000..e1fdeb0
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+#define _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+
+struct pv_lock_ops {
+ void (*lock)(struct qspinlock *lock, u32 val);
+ void (*unlock)(struct qspinlock *lock);
+ void (*wait)(u8 *ptr, u8 val, int cpu);
+ void (*kick)(int cpu);
+};
+
+extern struct pv_lock_ops pv_lock_op;
+
+#endif
diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c
new file mode 100644
index 0000000..4f19f7e
--- /dev/null
+++ b/arch/powerpc/kernel/paravirt.c
@@ -0,0 +1,45 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+
+static void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+
+static void __pv_wait(u8 *ptr, u8 val, int cpu)
+{
+ HMT_low();
+ if (READ_ONCE(*ptr) == val)
+ __spin_yield_cpu(cpu);
+ HMT_medium();
+}
+
+static void __pv_kick(int cpu)
+{
+ __spin_wake_cpu(cpu);
+}
+
+struct pv_lock_ops pv_lock_op = {
+ .lock = native_queued_spin_lock_slowpath,
+ .unlock = __native_queued_spin_unlock,
+ .wait = NULL,
+ .kick = NULL,
+};
+EXPORT_SYMBOL(pv_lock_op);
+
+void __init pv_lock_init(void)
+{
+ if (SHARED_PROCESSOR) {
+ __pv_init_lock_hash();
+ pv_lock_op.lock = __pv_queued_spin_lock_slowpath;
+ pv_lock_op.unlock = __pv_queued_spin_unlock;
+ pv_lock_op.wait = __pv_wait;
+ pv_lock_op.kick = __pv_kick;
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6e944fc..c9f056e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -547,6 +547,11 @@ static void __init pSeries_setup_arch(void)
"%ld\n", rc);
}
}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+ pv_lock_init();
+#endif
+
}
static int __init pSeries_init_panel(void)
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
` (3 preceding siblings ...)
2016-05-25 8:18 ` [PATCH v3 4/6] pv-qspinlock: powerpc support pv-qspinlock Pan Xinhui
@ 2016-05-25 8:18 ` Pan Xinhui
2016-05-26 16:47 ` Peter Zijlstra
2016-05-25 8:18 ` [PATCH v3 6/6] powerpc: pseries: Add pv-qspinlock build config/make Pan Xinhui
` (2 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: Pan Xinhui @ 2016-05-25 8:18 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, jeremy, chrisw,
akataria, rusty, Pan Xinhui
cmpxchg_release is light-wight than cmpxchg, we can gain a better
performace then. On some arch like ppc, barrier impact the performace
too much.
Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
kernel/locking/qspinlock_paravirt.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index a5b1248..2bbffe4 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
if (likely(locked == _Q_LOCKED_VAL))
return;
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
2016-05-25 8:18 ` [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock Pan Xinhui
@ 2016-05-26 16:47 ` Peter Zijlstra
2016-05-26 16:57 ` Peter Zijlstra
0 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2016-05-26 16:47 UTC (permalink / raw)
To: Pan Xinhui
Cc: linux-kernel, linuxppc-dev, virtualization, benh, paulus, mpe,
mingo, paulmck, jeremy, chrisw, akataria, rusty
On Wed, May 25, 2016 at 04:18:08PM +0800, Pan Xinhui wrote:
> cmpxchg_release is light-wight than cmpxchg, we can gain a better
> performace then. On some arch like ppc, barrier impact the performace
> too much.
>
> Suggested-by: Boqun Feng <boqun.feng@gmail.com>
> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
> ---
> kernel/locking/qspinlock_paravirt.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index a5b1248..2bbffe4 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
> * unhash. Otherwise it would be possible to have multiple @lock
> * entries, which would be BAD.
> */
> - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
> + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
> if (likely(locked == _Q_LOCKED_VAL))
> return;
This patch fails to explain _why_ it can be relaxed.
And seeing how this cmpxchg() can actually unlock the lock, I don't see
how this can possibly be correct. Maybe cmpxchg_release(), but relaxed
seems very wrong.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
2016-05-26 16:47 ` Peter Zijlstra
@ 2016-05-26 16:57 ` Peter Zijlstra
2016-05-27 10:34 ` xinhui
0 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2016-05-26 16:57 UTC (permalink / raw)
To: Pan Xinhui
Cc: linux-kernel, linuxppc-dev, virtualization, benh, paulus, mpe,
mingo, paulmck, jeremy, chrisw, akataria, rusty
On Thu, May 26, 2016 at 06:47:29PM +0200, Peter Zijlstra wrote:
> On Wed, May 25, 2016 at 04:18:08PM +0800, Pan Xinhui wrote:
> > cmpxchg_release is light-wight than cmpxchg, we can gain a better
> > performace then. On some arch like ppc, barrier impact the performace
> > too much.
> >
> > Suggested-by: Boqun Feng <boqun.feng@gmail.com>
> > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
> > ---
> > kernel/locking/qspinlock_paravirt.h | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> > index a5b1248..2bbffe4 100644
> > --- a/kernel/locking/qspinlock_paravirt.h
> > +++ b/kernel/locking/qspinlock_paravirt.h
> > @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
> > * unhash. Otherwise it would be possible to have multiple @lock
> > * entries, which would be BAD.
> > */
> > - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
> > + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
> > if (likely(locked == _Q_LOCKED_VAL))
> > return;
>
> This patch fails to explain _why_ it can be relaxed.
>
> And seeing how this cmpxchg() can actually unlock the lock, I don't see
> how this can possibly be correct. Maybe cmpxchg_release(), but relaxed
> seems very wrong.
Clearly I need to stop working for the day, I cannea read. You're doing
release, not relaxed.
Still Changelog needs improvement.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
2016-05-26 16:57 ` Peter Zijlstra
@ 2016-05-27 10:34 ` xinhui
0 siblings, 0 replies; 13+ messages in thread
From: xinhui @ 2016-05-27 10:34 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, linuxppc-dev, virtualization, benh, paulus, mpe,
mingo, paulmck, jeremy, chrisw, akataria, rusty
On 2016年05月27日 00:57, Peter Zijlstra wrote:
> On Thu, May 26, 2016 at 06:47:29PM +0200, Peter Zijlstra wrote:
>> On Wed, May 25, 2016 at 04:18:08PM +0800, Pan Xinhui wrote:
>>> cmpxchg_release is light-wight than cmpxchg, we can gain a better
>>> performace then. On some arch like ppc, barrier impact the performace
>>> too much.
>>>
>>> Suggested-by: Boqun Feng <boqun.feng@gmail.com>
>>> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
>>> ---
>>> kernel/locking/qspinlock_paravirt.h | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
>>> index a5b1248..2bbffe4 100644
>>> --- a/kernel/locking/qspinlock_paravirt.h
>>> +++ b/kernel/locking/qspinlock_paravirt.h
>>> @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
>>> * unhash. Otherwise it would be possible to have multiple @lock
>>> * entries, which would be BAD.
>>> */
>>> - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
>>> + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
>>> if (likely(locked == _Q_LOCKED_VAL))
>>> return;
>>
>> This patch fails to explain _why_ it can be relaxed.
>>
>> And seeing how this cmpxchg() can actually unlock the lock, I don't see
>> how this can possibly be correct. Maybe cmpxchg_release(), but relaxed
>> seems very wrong.
>
> Clearly I need to stop working for the day, I cannea read. You're doing
> release, not relaxed.
>
Never mind. thanks for review :)
> Still Changelog needs improvement.
>
Will do that.
thanks
xinhui
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v3 6/6] powerpc: pseries: Add pv-qspinlock build config/make
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
` (4 preceding siblings ...)
2016-05-25 8:18 ` [PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock Pan Xinhui
@ 2016-05-25 8:18 ` Pan Xinhui
2016-05-26 16:50 ` [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Peter Zijlstra
2016-05-26 16:54 ` Peter Zijlstra
7 siblings, 0 replies; 13+ messages in thread
From: Pan Xinhui @ 2016-05-25 8:18 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, jeremy, chrisw,
akataria, rusty, Pan Xinhui
pseries has PowerVM support, the default option is Y.
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/powerpc/kernel/Makefile | 1 +
arch/powerpc/platforms/pseries/Kconfig | 8 ++++++++
2 files changed, 9 insertions(+)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..ae7c2f1 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
+obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f669323..46632e4 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -128,3 +128,11 @@ config HV_PERF_CTRS
systems. 24x7 is available on Power 8 systems.
If unsure, select Y.
+
+config PARAVIRT_SPINLOCKS
+ bool "Paravirtialization support for qspinlock"
+ depends on PPC_SPLPAR && QUEUED_SPINLOCKS
+ default y
+ help
+ If platform supports virtualization, for example PowerVM, this option
+ can let guest have a better performace.
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
` (5 preceding siblings ...)
2016-05-25 8:18 ` [PATCH v3 6/6] powerpc: pseries: Add pv-qspinlock build config/make Pan Xinhui
@ 2016-05-26 16:50 ` Peter Zijlstra
2016-05-27 10:52 ` xinhui
2016-05-26 16:54 ` Peter Zijlstra
7 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2016-05-26 16:50 UTC (permalink / raw)
To: Pan Xinhui
Cc: linux-kernel, linuxppc-dev, virtualization, benh, paulus, mpe,
mingo, paulmck, jeremy, chrisw, akataria, rusty
On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:
> _____test________________spinlcok______________pv-qspinlcok_____
> |futex hash | 556370 ops | 629634 ops |
> |futex lock-pi | 362 ops | 367 ops |
>
> scheduler test:
> Test how many loops of schedule() can finish within 10 seconds on all cpus.
>
> _____test________________spinlcok______________pv-qspinlcok_____
> |schedule() loops| 322811921 | 311449290 |
>
> kernel compiling test:
> build a linux kernel image to see how long it took
>
> _____test________________spinlcok______________pv-qspinlcok_____
> | compiling takes| 22m | 22m |
s/spinlcok/spinlock/
Is 'spinlcok' the current test-and-set lock?
And what about regular qspinlock, in case of !SHARED_PROCESSOR?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
2016-05-26 16:50 ` [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Peter Zijlstra
@ 2016-05-27 10:52 ` xinhui
0 siblings, 0 replies; 13+ messages in thread
From: xinhui @ 2016-05-27 10:52 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, linuxppc-dev, virtualization, benh, paulus, mpe,
mingo, paulmck, jeremy, chrisw, akataria, rusty
On 2016年05月27日 00:50, Peter Zijlstra wrote:
> On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:
>
>> _____test________________spinlcok______________pv-qspinlcok_____
>> |futex hash | 556370 ops | 629634 ops |
>> |futex lock-pi | 362 ops | 367 ops |
>>
>> scheduler test:
>> Test how many loops of schedule() can finish within 10 seconds on all cpus.
>>
>> _____test________________spinlcok______________pv-qspinlcok_____
>> |schedule() loops| 322811921 | 311449290 |
>>
>> kernel compiling test:
>> build a linux kernel image to see how long it took
>>
>> _____test________________spinlcok______________pv-qspinlcok_____
>> | compiling takes| 22m | 22m |
>
>
> s/spinlcok/spinlock/
>
Oh, foolish mistake...sorry
> Is 'spinlcok' the current test-and-set lock?
>
Yes. I will describe it in a clear way in the next patchset.
> And what about regular qspinlock, in case of !SHARED_PROCESSOR?
>
You mean the test results on powerNV?
yes, I make a kernel build with !SHARED_PROCESSOR.
and do perf tests and scheduler tests on same machine(32 cpus). performance is better than current spinlock
_____test________________spinlock________________qspinlock_____
|futex hash | 533060 ops | 541513 ops |
|futex lock-pi | 357 ops | 356 ops |
_____test________________spinlock________________qspinlock_____
|schedule() loops| 337691713 | 361935207 |
NOTE: I have updated the scheduler test tools, and the new performance test results show that both pv-spinlock and qspinlock is better than current spinlock.
I will also update the test result in my next patchset.
thanks
xinhui
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
2016-05-25 8:18 [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Pan Xinhui
` (6 preceding siblings ...)
2016-05-26 16:50 ` [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention Peter Zijlstra
@ 2016-05-26 16:54 ` Peter Zijlstra
7 siblings, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2016-05-26 16:54 UTC (permalink / raw)
To: Pan Xinhui
Cc: linux-kernel, linuxppc-dev, virtualization, benh, paulus, mpe,
mingo, paulmck, jeremy, chrisw, akataria, rusty
On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:
> I do several tests on pseries IBM,8408-E8E with 32cpus, 64GB memory.
> benchmark test results are below.
> _____test________________spinlcok______________pv-qspinlcok_____
> | compiling takes| 22m | 22m |
How can a kernel build take 22 minutes with 32 CPUs? That's far too
long. My 40 CPU IVB-EP takes all of 50 seconds to build an
x86_64-defconfig from scratch.
^ permalink raw reply [flat|nested] 13+ messages in thread