linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] changes to enable lockless lockref on s390
@ 2013-09-28 10:58 Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 1/3] mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef Heiko Carstens
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Heiko Carstens @ 2013-09-28 10:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Tony Luck, Waiman Long, Martin Schwidefsky, Ingo Molnar,
	linux-kernel, Heiko Carstens

Hi Linus,

I just updated the patch set to include your suggested changes.

You can also pull the three patches from

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git lockref

Original part of description that still matters:

enabling the new lockless lockref variant on s390 would have been trivial
until Tony Luck added a cpu_relax() call into the CMPXCHG_LOOP(), with
d472d9d9 "lockref: Relax in cmpxchg loop".

As already mentioned cpu_relax() is very expensive on s390 since it yields()
the current virtual cpu. So we are talking of several thousand cycles.
Considering this enabling the lockless lockref variant would contradict the
intention of the new semantics. And also some quick measurements show
performance regressions of 50% and more.

Simply removing the cpu_relax() call again seems also not very desireable
since Waiman Long reported that for some workloads the call improved
performance by 5%.

Heiko Carstens (3):
  mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef
  lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP()
  s390: enable ARCH_USE_CMPXCHG_LOCKREF

 arch/Kconfig                      |  3 ---
 arch/s390/Kconfig                 |  2 +-
 arch/s390/include/asm/mutex.h     |  2 --
 arch/s390/include/asm/processor.h |  2 ++
 arch/s390/include/asm/spinlock.h  |  5 +++++
 include/linux/mutex.h             |  6 +++---
 lib/lockref.c                     | 10 +++++++++-
 7 files changed, 20 insertions(+), 10 deletions(-)

-- 
1.8.3.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 1/3] mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef
  2013-09-28 10:58 [PATCH v2 0/3] changes to enable lockless lockref on s390 Heiko Carstens
@ 2013-09-28 10:58 ` Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 2/3] lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP() Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 3/3] s390: enable ARCH_USE_CMPXCHG_LOCKREF Heiko Carstens
  2 siblings, 0 replies; 4+ messages in thread
From: Heiko Carstens @ 2013-09-28 10:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Tony Luck, Waiman Long, Martin Schwidefsky, Ingo Molnar,
	linux-kernel, Heiko Carstens

Linus suggested to replace

 #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
 #define arch_mutex_cpu_relax() cpu_relax()
 #endif

with just a simple

  #ifndef arch_mutex_cpu_relax
  # define arch_mutex_cpu_relax() cpu_relax()
  #endif

to get rid of CONFIG_HAVE_CPU_RELAX_SIMPLE. So architectures can
simply define arch_mutex_cpu_relax if they want an architecture
specific function instead of having to add a select statement in
their Kconfig in addition.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/Kconfig                      | 3 ---
 arch/s390/Kconfig                 | 1 -
 arch/s390/include/asm/mutex.h     | 2 --
 arch/s390/include/asm/processor.h | 2 ++
 include/linux/mutex.h             | 6 +++---
 5 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 1feb169..af2cc6e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
-config HAVE_ARCH_MUTEX_CPU_RELAX
-	bool
-
 config HAVE_RCU_TABLE_FREE
 	bool
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index dcc6ac2..d3fa840 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -102,7 +102,6 @@ config S390
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
-	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 688271f..458c1f7 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,5 +7,3 @@
  */
 
 #include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax()	barrier()
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0eb3750..ca7821f 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -198,6 +198,8 @@ static inline void cpu_relax(void)
 	barrier();
 }
 
+#define arch_mutex_cpu_relax()  barrier()
+
 static inline void psw_set_key(unsigned int key)
 {
 	asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index ccd4260..bab49da 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -15,8 +15,8 @@
 #include <linux/spinlock_types.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
-
 #include <linux/atomic.h>
+#include <asm/processor.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -175,8 +175,8 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
-#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
-#define arch_mutex_cpu_relax()	cpu_relax()
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
 #endif
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 2/3] lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP()
  2013-09-28 10:58 [PATCH v2 0/3] changes to enable lockless lockref on s390 Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 1/3] mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef Heiko Carstens
@ 2013-09-28 10:58 ` Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 3/3] s390: enable ARCH_USE_CMPXCHG_LOCKREF Heiko Carstens
  2 siblings, 0 replies; 4+ messages in thread
From: Heiko Carstens @ 2013-09-28 10:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Tony Luck, Waiman Long, Martin Schwidefsky, Ingo Molnar,
	linux-kernel, Heiko Carstens

Make use of arch_mutex_cpu_relax() so architectures can override the
default cpu_relax() semantics.
This is especially useful for s390, where cpu_relax() means that we
yield() the current (virtual) cpu and therefore is very expensive,
and would contradict the whole purpose of the lockless cmpxchg loop.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 lib/lockref.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/lockref.c b/lib/lockref.c
index e294ae4..6f9d434 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -12,6 +12,14 @@
 #endif
 
 /*
+ * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP.
+ * This is useful for architectures with an expensive cpu_relax().
+ */
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
+/*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
  */
@@ -28,7 +36,7 @@
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-		cpu_relax();							\
+		arch_mutex_cpu_relax();						\
 	}									\
 } while (0)
 
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 3/3] s390: enable ARCH_USE_CMPXCHG_LOCKREF
  2013-09-28 10:58 [PATCH v2 0/3] changes to enable lockless lockref on s390 Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 1/3] mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef Heiko Carstens
  2013-09-28 10:58 ` [PATCH v2 2/3] lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP() Heiko Carstens
@ 2013-09-28 10:58 ` Heiko Carstens
  2 siblings, 0 replies; 4+ messages in thread
From: Heiko Carstens @ 2013-09-28 10:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Tony Luck, Waiman Long, Martin Schwidefsky, Ingo Molnar,
	linux-kernel, Heiko Carstens

Enable ARCH_USE_CMPXCHG_LOCKREF since it shows performance improvements
with Linus' simple stat() test case of up to 50% on a 30 cpu system.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/Kconfig                | 1 +
 arch/s390/include/asm/spinlock.h | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d3fa840..7143793 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -93,6 +93,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 701fe8c..83e5d216 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
 extern int arch_spin_trylock_retry(arch_spinlock_t *);
 extern void arch_spin_relax(arch_spinlock_t *lock);
 
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner_cpu == 0;
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lp)
 {
 	int old;
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-09-28 10:58 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-28 10:58 [PATCH v2 0/3] changes to enable lockless lockref on s390 Heiko Carstens
2013-09-28 10:58 ` [PATCH v2 1/3] mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef Heiko Carstens
2013-09-28 10:58 ` [PATCH v2 2/3] lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP() Heiko Carstens
2013-09-28 10:58 ` [PATCH v2 3/3] s390: enable ARCH_USE_CMPXCHG_LOCKREF Heiko Carstens

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).