All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 0/3] Allow inlined spinlocks again V4
@ 2009-08-14 12:58 Heiko Carstens
  2009-08-14 12:58 ` [patch 1/3] spinlock: move spinlock function bodies to header file Heiko Carstens
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Heiko Carstens @ 2009-08-14 12:58 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Heiko Carstens, Arnd Bergmann,
	Horst Hartmann, Christian Ehrhardt, Nick Piggin

This patch set allows to have inlined spinlocks again.

The rationale behind this is that function calls on at least s390 are
expensive.

If one considers that server kernels are usually compiled with
!CONFIG_PREEMPT a simple spin_lock is just a compare and swap loop.
The extra overhead for a function call is significant.
With inlined spinlocks overall cpu usage gets reduced by 1%-5% on s390.
These numbers were taken with some network benchmarks. However I expect
any workload that calls frequently into the kernel and which grabs a few
locks to perform better.

The implementation is straight forward: move the function bodies of the
locking functions to static inline functions and place them in a header
file.
By default all locking code remains out-of-line. An architecture can
specify

#define __spin_lock_is_small

in arch/<whatever>/include/asm/spinlock.h to force inlining of a locking
function.

V2: rewritten from scratch - now also with readable code

V3: removed macro to generate out-of-line spinlock variants since that
    would break ctags. As requested by Arnd Bergmann.

V4: allow architectures to specify for each lock/unlock variant if
    it should be kept out-of-line or inlined.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [patch 1/3] spinlock: move spinlock function bodies to header file
  2009-08-14 12:58 [patch 0/3] Allow inlined spinlocks again V4 Heiko Carstens
@ 2009-08-14 12:58 ` Heiko Carstens
  2009-08-14 12:58 ` [patch 2/3] spinlock: allow inlined spinlocks Heiko Carstens
  2009-08-14 12:58 ` [patch 3/3] spinlock: inline code for all locking variants on s390 Heiko Carstens
  2 siblings, 0 replies; 17+ messages in thread
From: Heiko Carstens @ 2009-08-14 12:58 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Heiko Carstens, Arnd Bergmann,
	Horst Hartmann, Christian Ehrhardt, Nick Piggin

[-- Attachment #1: 01_spinlock_move.diff --]
[-- Type: text/plain, Size: 17156 bytes --]

From: Heiko Carstens <heiko.carstens@de.ibm.com>

Move spinlock function bodies to header file by creating a
static inline version of each variant.
Use the inline version on the out-of-line code. This shouldn't
make any difference besides that the spinlock code can now
be used to generate inlined spinlock code.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 include/linux/spinlock.h         |   18 +-
 include/linux/spinlock_api_smp.h |  263 +++++++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                |  174 ++++---------------------
 3 files changed, 300 insertions(+), 155 deletions(-)

Index: linux-2.6/include/linux/spinlock_api_smp.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_smp.h
+++ linux-2.6/include/linux/spinlock_api_smp.h
@@ -60,4 +60,267 @@ void __lockfunc _read_unlock_irqrestore(
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
+static inline int __spin_trylock(spinlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+static inline int __read_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_read_trylock(lock)) {
+		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+static inline int __write_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_write_trylock(lock)) {
+		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+/*
+ * If lockdep is enabled then we use the non-preemption spin-ops
+ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+ */
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+
+static inline void __read_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline unsigned long __spin_lock_irqsave(spinlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	/*
+	 * On lockdep we dont want the hand-coded irq-enable of
+	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * that interrupts are not re-enabled during lock-acquire:
+	 */
+#ifdef CONFIG_LOCKDEP
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+#else
+	_raw_spin_lock_flags(lock, &flags);
+#endif
+	return flags;
+}
+
+static inline void __spin_lock_irq(spinlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+static inline void __spin_lock_bh(spinlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
+			     _raw_read_lock_flags, &flags);
+	return flags;
+}
+
+static inline void __read_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline void __read_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
+			     _raw_write_lock_flags, &flags);
+	return flags;
+}
+
+static inline void __write_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+static inline void __write_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+static inline void __spin_lock(spinlock_t *lock)
+{
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+static inline void __write_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+#endif /* CONFIG_PREEMPT */
+
+static inline void __spin_unlock(spinlock_t *lock)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __write_unlock(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __read_unlock(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __spin_unlock_irqrestore(spinlock_t *lock,
+					    unsigned long flags)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __spin_unlock_irq(spinlock_t *lock)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __spin_unlock_bh(spinlock_t *lock)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __read_unlock_irq(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __read_unlock_bh(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline void __write_unlock_irqrestore(rwlock_t *lock,
+					     unsigned long flags)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __write_unlock_irq(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __write_unlock_bh(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline int __spin_trylock_bh(spinlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	return 0;
+}
+
 #endif /* __LINUX_SPINLOCK_API_SMP_H */
Index: linux-2.6/include/linux/spinlock.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock.h
+++ linux-2.6/include/linux/spinlock.h
@@ -143,15 +143,6 @@ static inline void smp_mb__after_lock(vo
  */
 #define spin_unlock_wait(lock)	__raw_spin_unlock_wait(&(lock)->raw_lock)
 
-/*
- * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-# include <linux/spinlock_api_smp.h>
-#else
-# include <linux/spinlock_api_up.h>
-#endif
-
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void _raw_spin_lock(spinlock_t *lock);
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
@@ -380,4 +371,13 @@ extern int _atomic_dec_and_lock(atomic_t
  */
 #define spin_can_lock(lock)	(!spin_is_locked(lock))
 
+/*
+ * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+# include <linux/spinlock_api_smp.h>
+#else
+# include <linux/spinlock_api_up.h>
+#endif
+
 #endif /* __LINUX_SPINLOCK_H */
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -23,40 +23,19 @@
 
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
-	preempt_disable();
-	if (_raw_spin_trylock(lock)) {
-		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-	
-	preempt_enable();
-	return 0;
+	return __spin_trylock(lock);
 }
 EXPORT_SYMBOL(_spin_trylock);
 
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
-	preempt_disable();
-	if (_raw_read_trylock(lock)) {
-		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-
-	preempt_enable();
-	return 0;
+	return __read_trylock(lock);
 }
 EXPORT_SYMBOL(_read_trylock);
 
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
-	preempt_disable();
-	if (_raw_write_trylock(lock)) {
-		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-
-	preempt_enable();
-	return 0;
+	return __write_trylock(lock);
 }
 EXPORT_SYMBOL(_write_trylock);
 
@@ -69,129 +48,74 @@ EXPORT_SYMBOL(_write_trylock);
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	__read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
 
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	/*
-	 * On lockdep we dont want the hand-coded irq-enable of
-	 * _raw_spin_lock_flags() code, because lockdep assumes
-	 * that interrupts are not re-enabled during lock-acquire:
-	 */
-#ifdef CONFIG_LOCKDEP
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
-#else
-	_raw_spin_lock_flags(lock, &flags);
-#endif
-	return flags;
+	return __spin_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
 
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
-	local_irq_disable();
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	__spin_lock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
 
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	__spin_lock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
 
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
-			     _raw_read_lock_flags, &flags);
-	return flags;
+	return __read_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_read_lock_irqsave);
 
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
-	local_irq_disable();
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	__read_lock_irq(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
 
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	__read_lock_bh(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
 
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
-			     _raw_write_lock_flags, &flags);
-	return flags;
+	return __write_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_write_lock_irqsave);
 
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
-	local_irq_disable();
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	__write_lock_irq(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
 
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	__write_lock_bh(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
 
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	__spin_lock(lock);
 }
-
 EXPORT_SYMBOL(_spin_lock);
 
 void __lockfunc _write_lock(rwlock_t *lock)
 {
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	__write_lock(lock);
 }
-
 EXPORT_SYMBOL(_write_lock);
 
 #else /* CONFIG_PREEMPT: */
@@ -320,121 +244,79 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	preempt_enable();
+	__spin_unlock(lock);
 }
 EXPORT_SYMBOL(_spin_unlock);
 
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	preempt_enable();
+	__write_unlock(lock);
 }
 EXPORT_SYMBOL(_write_unlock);
 
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	preempt_enable();
+	__read_unlock(lock);
 }
 EXPORT_SYMBOL(_read_unlock);
 
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
+	__spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
 
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
+	__spin_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_irq);
 
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	__spin_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
 
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
+	__read_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_read_unlock_irqrestore);
 
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
+	__read_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_read_unlock_irq);
 
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	__read_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_read_unlock_bh);
 
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
+	__write_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_write_unlock_irqrestore);
 
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
+	__write_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_write_unlock_irq);
 
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	__write_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_write_unlock_bh);
 
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	if (_raw_spin_trylock(lock)) {
-		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
-	return 0;
+	return __spin_trylock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
 

-- 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-14 12:58 [patch 0/3] Allow inlined spinlocks again V4 Heiko Carstens
  2009-08-14 12:58 ` [patch 1/3] spinlock: move spinlock function bodies to header file Heiko Carstens
@ 2009-08-14 12:58 ` Heiko Carstens
  2009-08-16 17:57   ` Heiko Carstens
  2009-08-14 12:58 ` [patch 3/3] spinlock: inline code for all locking variants on s390 Heiko Carstens
  2 siblings, 1 reply; 17+ messages in thread
From: Heiko Carstens @ 2009-08-14 12:58 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Heiko Carstens, Arnd Bergmann,
	Horst Hartmann, Christian Ehrhardt, Nick Piggin

[-- Attachment #1: 02_spinlock_ifdef.diff --]
[-- Type: text/plain, Size: 11770 bytes --]

From: Heiko Carstens <heiko.carstens@de.ibm.com>

This allows an architecture to specify per lock variant if the
locking code should be kept out-of-line or inlined.

If an architecure wants out-of-line locking code no change is
needed. To force inlining of e.g. spin_lock() the line

#define __spin_lock_is_small

needs to be added to arch/<whatever>/include/asm/spinlock.h

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 include/linux/spinlock_api_smp.h |  146 ++++++++++++++++++++++++++++++++++++++-
 kernel/spinlock.c                |   56 ++++++++++++++
 2 files changed, 199 insertions(+), 3 deletions(-)

Index: linux-2.6/include/linux/spinlock_api_smp.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_smp.h
+++ linux-2.6/include/linux/spinlock_api_smp.h
@@ -19,46 +19,186 @@ int in_lock_functions(unsigned long addr
 
 #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
 
-void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
 void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
 							__acquires(lock);
 void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *map)
 							__acquires(lock);
+unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+							__acquires(lock);
+
+#ifdef __spin_lock_is_small
+#define _spin_lock(lock) __spin_lock(lock)
+#else
+void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __read_lock_is_small
+#define _read_lock(lock) __read_lock(lock)
+#else
 void __lockfunc _read_lock(rwlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __write_lock_is_small
+#define _write_lock(lock) __write_lock(lock)
+#else
 void __lockfunc _write_lock(rwlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __spin_lock_bh_is_small
+#define _spin_lock_bh(lock) __spin_lock_bh(lock)
+#else
 void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __read_lock_bh_is_small
+#define _read_lock_bh(lock) __read_lock_bh(lock)
+#else
 void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __write_lock_bh_is_small
+#define _write_lock_bh(lock) __write_lock_bh(lock)
+#else
 void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __spin_lock_irq_is_small
+#define _spin_lock_irq(lock) __spin_lock_irq(lock)
+#else
 void __lockfunc _spin_lock_irq(spinlock_t *lock)	__acquires(lock);
+#endif
+
+#ifdef __read_lock_irq_is_small
+#define _read_lock_irq(lock) __read_lock_irq(lock)
+#else
 void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __write_lock_irq_is_small
+#define _write_lock_irq(lock) __write_lock_irq(lock)
+#else
 void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
+#endif
+
+#ifdef __spin_lock_irqsave_is_small
+#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
+#else
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 							__acquires(lock);
-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
-							__acquires(lock);
+#endif
+
+#ifdef __read_lock_irqsave_is_small
+#define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
+#else
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
+#endif
+
+#ifdef __write_lock_irqsave_is_small
+#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
+#else
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
+#endif
+
+#ifdef __spin_trylock_is_small
+#define _spin_trylock(lock) __spin_trylock(lock)
+#else
 int __lockfunc _spin_trylock(spinlock_t *lock);
+#endif
+
+#ifdef __read_trylock_is_small
+#define _read_trylock(lock) __read_trylock(lock)
+#else
 int __lockfunc _read_trylock(rwlock_t *lock);
+#endif
+
+#ifdef __write_trylock_is_small
+#define _write_trylock(lock) __write_trylock(lock)
+#else
 int __lockfunc _write_trylock(rwlock_t *lock);
+#endif
+
+#ifdef __spin_trylock_bh_is_small
+#define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
+#else
 int __lockfunc _spin_trylock_bh(spinlock_t *lock);
+#endif
+
+#ifdef __spin_unlock_is_small
+#define _spin_unlock(lock) __spin_unlock(lock)
+#else
 void __lockfunc _spin_unlock(spinlock_t *lock)		__releases(lock);
+#endif
+
+#ifdef __read_unlock_is_small
+#define _read_unlock(lock) __read_unlock(lock)
+#else
 void __lockfunc _read_unlock(rwlock_t *lock)		__releases(lock);
+#endif
+
+#ifdef __write_unlock_is_small
+#define _write_unlock(lock) __write_unlock(lock)
+#else
 void __lockfunc _write_unlock(rwlock_t *lock)		__releases(lock);
+#endif
+
+#ifdef __spin_unlock_bh_is_small
+#define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
+#else
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)	__releases(lock);
+#endif
+
+#ifdef __read_unlock_bh_is_small
+#define _read_unlock_bh(lock) __read_unlock_bh(lock)
+#else
 void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(lock);
+#endif
+
+#ifdef __write_unlock_bh_is_small
+#define _write_unlock_bh(lock) __write_unlock_bh(lock)
+#else
 void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(lock);
+#endif
+
+#ifdef __spin_unlock_irq_is_small
+#define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
+#else
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)	__releases(lock);
+#endif
+
+#ifdef __read_unlock_irq_is_small
+#define _read_unlock_irq(lock) __read_unlock_irq(lock)
+#else
 void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(lock);
+#endif
+
+#ifdef __write_unlock_irq_is_small
+#define _write_unlock_irq(lock) __write_unlock_irq(lock)
+#else
 void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(lock);
+#endif
+
+#ifdef __spin_unlock_irqrestore_is_small
+#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
+#else
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 							__releases(lock);
+#endif
+
+#ifdef __read_unlock_irqrestore_is_small
+#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
+#else
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
+#endif
+
+#ifdef __write_unlock_irqrestore_is_small
+#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
+#else
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
+#endif
 
 static inline int __spin_trylock(spinlock_t *lock)
 {
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -21,23 +21,29 @@
 #include <linux/debug_locks.h>
 #include <linux/module.h>
 
+#ifndef __spin_trylock_is_small
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	return __spin_trylock(lock);
 }
 EXPORT_SYMBOL(_spin_trylock);
+#endif
 
+#ifndef __read_trylock_is_small
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	return __read_trylock(lock);
 }
 EXPORT_SYMBOL(_read_trylock);
+#endif
 
+#ifndef __write_trylock_is_small
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	return __write_trylock(lock);
 }
 EXPORT_SYMBOL(_write_trylock);
+#endif
 
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
@@ -46,77 +52,101 @@ EXPORT_SYMBOL(_write_trylock);
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
+#ifndef __read_lock_is_small
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	__read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
+#endif
 
+#ifndef __spin_lock_irqsave_is_small
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
 	return __spin_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
+#endif
 
+#ifndef __spin_lock_irq_is_small
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	__spin_lock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
+#endif
 
+#ifndef __spin_lock_bh_is_small
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	__spin_lock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
+#endif
 
+#ifndef __read_lock_irqsave_is_small
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
 	return __read_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_read_lock_irqsave);
+#endif
 
+#ifndef __read_lock_irq_is_small
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	__read_lock_irq(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
+#endif
 
+#ifndef __read_lock_bh_is_small
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	__read_lock_bh(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
+#endif
 
+#ifndef __write_lock_irqsave_is_small
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
 	return __write_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_write_lock_irqsave);
+#endif
 
+#ifndef __write_lock_irq_is_small
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	__write_lock_irq(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
+#endif
 
+#ifndef __write_lock_bh_is_small
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	__write_lock_bh(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
+#endif
 
+#ifndef __spin_lock_is_small
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	__spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock);
+#endif
 
+#ifndef __write_lock_is_small
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	__write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock);
+#endif
 
 #else /* CONFIG_PREEMPT: */
 
@@ -242,83 +272,109 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 #endif
 
+#ifndef __spin_unlock_is_small
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
 	__spin_unlock(lock);
 }
 EXPORT_SYMBOL(_spin_unlock);
+#endif
 
+#ifndef __write_unlock_is_small
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
 	__write_unlock(lock);
 }
 EXPORT_SYMBOL(_write_unlock);
+#endif
 
+#ifndef __read_unlock_is_small
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
 	__read_unlock(lock);
 }
 EXPORT_SYMBOL(_read_unlock);
+#endif
 
+#ifndef __spin_unlock_irqrestore_is_small
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
 	__spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
+#endif
 
+#ifndef __spin_unlock_irq_is_small
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
 	__spin_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_irq);
+#endif
 
+#ifndef __spin_unlock_bh_is_small
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
 	__spin_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
+#endif
 
+#ifndef __read_unlock_irqrestore_is_small
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__read_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_read_unlock_irqrestore);
+#endif
 
+#ifndef __read_unlock_irq_is_small
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
 	__read_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_read_unlock_irq);
+#endif
 
+#ifndef __read_unlock_bh_is_small
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	__read_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_read_unlock_bh);
+#endif
 
+#ifndef __write_unlock_irqrestore_is_small
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__write_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_write_unlock_irqrestore);
+#endif
 
+#ifndef __write_unlock_irq_is_small
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
 	__write_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_write_unlock_irq);
+#endif
 
+#ifndef __write_unlock_bh_is_small
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	__write_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_write_unlock_bh);
+#endif
 
+#ifndef __spin_trylock_bh_is_small
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	return __spin_trylock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
+#endif
 
 notrace int in_lock_functions(unsigned long addr)
 {

-- 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [patch 3/3] spinlock: inline code for all locking variants on s390
  2009-08-14 12:58 [patch 0/3] Allow inlined spinlocks again V4 Heiko Carstens
  2009-08-14 12:58 ` [patch 1/3] spinlock: move spinlock function bodies to header file Heiko Carstens
  2009-08-14 12:58 ` [patch 2/3] spinlock: allow inlined spinlocks Heiko Carstens
@ 2009-08-14 12:58 ` Heiko Carstens
  2 siblings, 0 replies; 17+ messages in thread
From: Heiko Carstens @ 2009-08-14 12:58 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Heiko Carstens, Arnd Bergmann,
	Horst Hartmann, Christian Ehrhardt, Nick Piggin

[-- Attachment #1: 03_spinlock_s390.diff --]
[-- Type: text/plain, Size: 1851 bytes --]

From: Heiko Carstens <heiko.carstens@de.ibm.com>

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/include/asm/spinlock.h |   34 ++++++++++++++++++++++++++++++++++
 include/linux/spinlock_api_smp.h |    2 +-
 2 files changed, 35 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/s390/include/asm/spinlock.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/spinlock.h
+++ linux-2.6/arch/s390/include/asm/spinlock.h
@@ -191,4 +191,38 @@ static inline int __raw_write_trylock(ra
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
+#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) && \
+    !defined(CONFIG_DEBUG_SPINLOCK)
+
+#define __spin_lock_is_small
+#define __read_lock_is_small
+#define __write_lock_is_small
+#define __spin_lock_bh_is_small
+#define __read_lock_bh_is_small
+#define __write_lock_bh_is_small
+#define __spin_lock_irq_is_small
+#define __read_lock_irq_is_small
+#define __write_lock_irq_is_small
+#define __spin_lock_irqsave_is_small
+#define __read_lock_irqsave_is_small
+#define __write_lock_irqsave_is_small
+#define __spin_trylock_is_small
+#define __read_trylock_is_small
+#define __write_trylock_is_small
+#define __spin_trylock_bh_is_small
+#define __spin_unlock_is_small
+#define __read_unlock_is_small
+#define __write_unlock_is_small
+#define __spin_unlock_bh_is_small
+#define __read_unlock_bh_is_small
+#define __write_unlock_bh_is_small
+#define __spin_unlock_irq_is_small
+#define __read_unlock_irq_is_small
+#define __write_unlock_irq_is_small
+#define __spin_unlock_irqrestore_is_small
+#define __read_unlock_irqrestore_is_small
+#define __write_unlock_irqrestore_is_small
+
+#endif /* defined(SMP) && !defined(PREEMPT) && !defined(DEBUG_SPINLOCK) */
+
 #endif /* __ASM_SPINLOCK_H */

-- 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-14 12:58 ` [patch 2/3] spinlock: allow inlined spinlocks Heiko Carstens
@ 2009-08-16 17:57   ` Heiko Carstens
  2009-08-16 18:06     ` Ingo Molnar
  2009-08-16 18:22     ` Linus Torvalds
  0 siblings, 2 replies; 17+ messages in thread
From: Heiko Carstens @ 2009-08-16 17:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin

Hi Linus, Andrew,

do you have any objections to the approach below?
Just wondering, since I didn't receive any comments.

On Fri, Aug 14, 2009 at 02:58:03PM +0200, Heiko Carstens wrote:
> From: Heiko Carstens <heiko.carstens@de.ibm.com>
> 
> This allows an architecture to specify per lock variant if the
> locking code should be kept out-of-line or inlined.
> 
> If an architecure wants out-of-line locking code no change is
> needed. To force inlining of e.g. spin_lock() the line
> 
> #define __spin_lock_is_small
> 
> needs to be added to arch/<whatever>/include/asm/spinlock.h
> 
> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
> ---
>  include/linux/spinlock_api_smp.h |  146 ++++++++++++++++++++++++++++++++++++++-
>  kernel/spinlock.c                |   56 ++++++++++++++
>  2 files changed, 199 insertions(+), 3 deletions(-)
> 
> Index: linux-2.6/include/linux/spinlock_api_smp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/spinlock_api_smp.h
> +++ linux-2.6/include/linux/spinlock_api_smp.h
> @@ -19,46 +19,186 @@ int in_lock_functions(unsigned long addr
> 
>  #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
> 
> -void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
>  void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
>  							__acquires(lock);
>  void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *map)
>  							__acquires(lock);
> +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> +							__acquires(lock);
> +
> +#ifdef __spin_lock_is_small
> +#define _spin_lock(lock) __spin_lock(lock)
> +#else
> +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> +#endif

...

> Index: linux-2.6/kernel/spinlock.c
> ===================================================================
> --- linux-2.6.orig/kernel/spinlock.c
> +++ linux-2.6/kernel/spinlock.c

...

> +#ifndef __spin_lock_is_small
>  void __lockfunc _spin_lock(spinlock_t *lock)
>  {
>  	__spin_lock(lock);
>  }
>  EXPORT_SYMBOL(_spin_lock);
> +#endif

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 17:57   ` Heiko Carstens
@ 2009-08-16 18:06     ` Ingo Molnar
  2009-08-16 18:43       ` Linus Torvalds
  2009-08-16 18:44       ` Heiko Carstens
  2009-08-16 18:22     ` Linus Torvalds
  1 sibling, 2 replies; 17+ messages in thread
From: Ingo Molnar @ 2009-08-16 18:06 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Andrew Morton, Linus Torvalds, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin


* Heiko Carstens <heiko.carstens@de.ibm.com> wrote:

> > #define __spin_lock_is_small

> > +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> > +							__acquires(lock);
> > +
> > +#ifdef __spin_lock_is_small
> > +#define _spin_lock(lock) __spin_lock(lock)
> > +#else
> > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > +#endif

Dunno - i'm somewhat wary of introducing a 2^28 variability here. 
(although the number of real variations is much lower - but still).

What's the current situation on s390, precisely which of the 28 lock 
functions are a win to be inlined and which ones are a loss? Do you 
have a list/table perhaps?

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 17:57   ` Heiko Carstens
  2009-08-16 18:06     ` Ingo Molnar
@ 2009-08-16 18:22     ` Linus Torvalds
  2009-08-17 15:46       ` Heiko Carstens
  1 sibling, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2009-08-16 18:22 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Andrew Morton, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin



On Sun, 16 Aug 2009, Heiko Carstens wrote:
> 
> do you have any objections to the approach below?
> Just wondering, since I didn't receive any comments.

Other things going on, but here goes:

> > +#ifdef __spin_lock_is_small
> > +#define _spin_lock(lock) __spin_lock(lock)
> > +#else
> > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > +#endif

The above repeats many times, and I found that irritating. I think you 
should be able to get rid of a _lot_ of that by doing it like this instead

	extern void __lockfunc _spin_lock(spinlock_t *lock) __acquires(lock);
	... plain declarations for all cases, no #ifdef's ...

and then at the end you just do

	#ifdef __spin_lock_is_small
	  #define _spin_lock(lock) __spin_lock(lock)         
	#endif
	..

and at least make that repeating section be composed of simpler and 
smaller entries.

Now, I actually have a disgusting way to avoid the #ifdef's entirely, but 
I'm not sure it's worth it. You'd do the same unconditional function 
declarations, but then we can also do some unconditional CPP expansions:

	#define __define2_lock(name,extent) name##extent
	#define __define_lock(name,extent) __define2_lock(name,extent)  
	#define define_lock(name) __define_lock(__##name, __##name##_is_small)

	// for each lock type
	#define __spin_lock__spin_lock_is_small _spin_lock  
	#define _spin_lock define_lock(spin_lock)

and what happens is that if '__spin_lock_is_small' is undefined, you end 
up having '_spin_lock()' expand to '__spin_lock__spin_lock_is_small()' and 
then back to _spin_lock, but if it's #defined (to empty), it gets defined 
to __spin_lock() instead.

No ifdefs, just two unconditional #define lines for each lock type.

Ugly? Yes. Less ugly than having variations of that #ifdef repeated many 
times? I dunno.

			Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 18:06     ` Ingo Molnar
@ 2009-08-16 18:43       ` Linus Torvalds
  2009-08-16 20:24         ` Ingo Molnar
  2009-08-16 18:44       ` Heiko Carstens
  1 sibling, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2009-08-16 18:43 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Heiko Carstens, Andrew Morton, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin



On Sun, 16 Aug 2009, Ingo Molnar wrote:
> 
> What's the current situation on s390, precisely which of the 28 lock 
> functions are a win to be inlined and which ones are a loss? Do you 
> have a list/table perhaps?

Let's look at x86 instead. 

The one I can _guarantee_ is worth inlining is "spin_unlock()", since it 
just generates a single "incb %m" or whatever. No loops, no conditionals, 
no nuthing. It's not even a locked instruction. Right now we literally 
generate this function for it:

	0xffffffff81420d74 <_spin_unlock+0>:	push   %rbp
	0xffffffff81420d75 <_spin_unlock+1>:	mov    %rsp,%rbp
	0xffffffff81420d78 <_spin_unlock+4>:	incb   (%rdi)
	0xffffffff81420d7a <_spin_unlock+6>:	leaveq 
	0xffffffff81420d7b <_spin_unlock+7>:	retq   

iow, the actual "bulk" of that function is a single two-byte instruction. 
And for that we generate a whole 5-byte "call" instruction, along with all 
the costs of fixed register scheduling and stupid spilling etc.

read_unlock and write_unlock are similar, and are

	lock incl (%rdi)	// 3 bytes

and

	lock addl $0x1000000,(%rdi)	// 7 bytes

respectively. At 7 bytes, write_unlock() is still likely to be smaller 
inlined (because you avoid the register games).

Other cases on x86 that would be smaller in-lined:

 - _spin_unlock_irq: 3 bytes

	incb   (%rdi)
	sti

 - _spin_unlock_irqrestore: 4 bytes

	incb   (%rdi)
	push   %rsi
	popfq

 - _read_unlock_irq/_read_unlock_irqrestore (4 and 5 bytes respectively):

	lock incl (%rdi)
	sti / push+popfq

but not, for example, any of the locking functions, nor any of the "_bh" 
versions (because local_bh_enable ends up pretty complicated, unlike 
local_bh_disable). Nor even perhaps

 - _write_unlock_irqrestore: (9 bytes)

	lock addl $0x1000000,(%rdi)
	push   %rsi
	popfq

which is starting to get to the point where a call _may_ be smaller 
(largely due to that big constant).

And '_spin_lock()' is already too big to inline:

	mov	$0x100,%eax
	lock xadd %ax,(%rdi)
	cmp	%ah,%al
	je	2f
	pause
	mov	(%rdi),%al
	je	1b

which is 20 bytes or so, and that's the simplest of the locking cases. So 
you really do have a mix of "want to inline" and "do not want to inline".

			Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 18:06     ` Ingo Molnar
  2009-08-16 18:43       ` Linus Torvalds
@ 2009-08-16 18:44       ` Heiko Carstens
  2009-08-16 20:48         ` Ingo Molnar
  1 sibling, 1 reply; 17+ messages in thread
From: Heiko Carstens @ 2009-08-16 18:44 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Linus Torvalds, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin

On Sun, Aug 16, 2009 at 08:06:31PM +0200, Ingo Molnar wrote:
> * Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
> > > #define __spin_lock_is_small
> 
> > > +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> > > +							__acquires(lock);
> > > +
> > > +#ifdef __spin_lock_is_small
> > > +#define _spin_lock(lock) __spin_lock(lock)
> > > +#else
> > > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > > +#endif
> 
> Dunno - i'm somewhat wary of introducing a 2^28 variability here. 
> (although the number of real variations is much lower - but still).
> 
> What's the current situation on s390, precisely which of the 28 lock 
> functions are a win to be inlined and which ones are a loss? Do you 
> have a list/table perhaps?

No list unfortunately. However, the variants we really care about are
only the spin_locks. The *try_lock variants are also not that important.
So we end up with eight ifdefs (spin_lock/bh/irq/irq_save + unlock).

I'll change the patches according to Linus' comments and send them
again.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 18:43       ` Linus Torvalds
@ 2009-08-16 20:24         ` Ingo Molnar
  2009-08-16 21:07           ` Linus Torvalds
  0 siblings, 1 reply; 17+ messages in thread
From: Ingo Molnar @ 2009-08-16 20:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Heiko Carstens, Andrew Morton, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sun, 16 Aug 2009, Ingo Molnar wrote:
> > 
> > What's the current situation on s390, precisely which of the 28 lock 
> > functions are a win to be inlined and which ones are a loss? Do you 
> > have a list/table perhaps?
> 
> Let's look at x86 instead. 
> 
> The one I can _guarantee_ is worth inlining is "spin_unlock()", 
> since it just generates a single "incb %m" or whatever. [...]

We do that already for that reason:

/*
 * We inline the unlock functions in the nondebug case:
 */
#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
        !defined(CONFIG_SMP)
# define spin_unlock(lock)              _spin_unlock(lock)
# define read_unlock(lock)              _read_unlock(lock)
# define write_unlock(lock)             _write_unlock(lock)
# define spin_unlock_irq(lock)          _spin_unlock_irq(lock)
# define read_unlock_irq(lock)          _read_unlock_irq(lock)
# define write_unlock_irq(lock)         _write_unlock_irq(lock)

this works fine here:

 ffffffff8103d359 <finish_task_switch>:
 ...
 ffffffff8103d393:	fe 03                	incb   (%rbx)
 ffffffff8103d395:	fb                   	sti    

that's an inlined spin_unlock_irq() in finish_task_switch().

We dont do this if DEBUG_SPINLOCK is defined (which bloats the 
function) or if CONFIG_PREEMPT is defined.

Maybe the latter case could be revised although generally it's not 
worth inlining functions that also touch task-struct / threadinfo.

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 18:44       ` Heiko Carstens
@ 2009-08-16 20:48         ` Ingo Molnar
  2009-08-16 21:33           ` Heiko Carstens
  0 siblings, 1 reply; 17+ messages in thread
From: Ingo Molnar @ 2009-08-16 20:48 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Andrew Morton, Linus Torvalds, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin


* Heiko Carstens <heiko.carstens@de.ibm.com> wrote:

> On Sun, Aug 16, 2009 at 08:06:31PM +0200, Ingo Molnar wrote:
> > * Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
> > > > #define __spin_lock_is_small
> > 
> > > > +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> > > > +							__acquires(lock);
> > > > +
> > > > +#ifdef __spin_lock_is_small
> > > > +#define _spin_lock(lock) __spin_lock(lock)
> > > > +#else
> > > > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > > > +#endif
> > 
> > Dunno - i'm somewhat wary of introducing a 2^28 variability here. 
> > (although the number of real variations is much lower - but still).
> > 
> > What's the current situation on s390, precisely which of the 28 lock 
> > functions are a win to be inlined and which ones are a loss? Do you 
> > have a list/table perhaps?
> 
> No list unfortunately. [...]

Well, if you dont know the functions you want to inline, how will 
you make intelligent use of this facility then in s390?

Btw., i just noticed that s390 has CONFIG_PREEMPT turned on by 
default in its defconfig. Have you made your measurements with 
CONFIG_PREEMPT? If yes then the current inlining rules in spinlock.h 
will turn all the locking APIs into functions.

Could you perhaps re-try your measurements with CONFIG_PREEMPT 
turned off? I suspect you'll see different behavior.

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 20:24         ` Ingo Molnar
@ 2009-08-16 21:07           ` Linus Torvalds
  2009-08-16 21:18             ` Ingo Molnar
  0 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2009-08-16 21:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Heiko Carstens, Andrew Morton, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin



On Sun, 16 Aug 2009, Ingo Molnar wrote:
> 
> We do that already for that reason:

Ahh, we do. I was mislead by the fact that we still generate the 
out-of-line ones, even though we don't use them. Which is fine, of course.

It doesn't do the irqrestore versions, but that's probably not a big deal.

			Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 21:07           ` Linus Torvalds
@ 2009-08-16 21:18             ` Ingo Molnar
  0 siblings, 0 replies; 17+ messages in thread
From: Ingo Molnar @ 2009-08-16 21:18 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Heiko Carstens, Andrew Morton, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sun, 16 Aug 2009, Ingo Molnar wrote:
> > 
> > We do that already for that reason:
> 
> Ahh, we do. I was mislead by the fact that we still generate the 
> out-of-line ones, even though we don't use them. Which is fine, of 
> course.
> 
> It doesn't do the irqrestore versions, but that's probably not a 
> big deal.

i remember having done gradual builds and turning the inlining of 
various locking functions on/off to see how it impacts the text 
size, so we were quite close to the optimal setup ... then. (two 
years ago?)

But ... the fact that you didnt notice it shows that the inlining 
decisions in spinlock.h are less than transparent and not really 
cleanly done.

Even if we dont change a thing on the x86 side the s390 observations 
are obviously valid, and Heiko's generalization looks more 
maintainable in terms of tweakability, as it's per function granular 
and per arch as well.

OTOH, it needs some work: the CONFIG_PREEMPT and 
CONFIG_DEBUG_SPINLOCKS dependency should be expressed too - and i 
think CONFIG_PREEMPT is probably was bit s390.

And at that point we've got 2^3 * 2^28 * 20 
(configs*functions*archs) variants, and i feel a bit uneasy about 
that kind of complexity (even if the typical cases will be much 
simpler than that). We wont really get it fully right, we'll just 
drown in that.

Dammit, why cannot the compiler get this right, working it around in 
the kernel source makes things so ugly :-(

It's not rocket science to do a good inliner but IMO GCC messed 
their design up on day one by separating the assembler from the C 
compiler, hence their inliner cannot really know about how large 
instructions are. Rather stupid IMHO.

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 20:48         ` Ingo Molnar
@ 2009-08-16 21:33           ` Heiko Carstens
  2009-08-16 21:36             ` Ingo Molnar
  0 siblings, 1 reply; 17+ messages in thread
From: Heiko Carstens @ 2009-08-16 21:33 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Linus Torvalds, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin

On Sun, Aug 16, 2009 at 10:48:31PM +0200, Ingo Molnar wrote:
> * Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
> > On Sun, Aug 16, 2009 at 08:06:31PM +0200, Ingo Molnar wrote:
> > > * Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
> > > > > #define __spin_lock_is_small
> > > 
> > > > > +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> > > > > +							__acquires(lock);
> > > > > +
> > > > > +#ifdef __spin_lock_is_small
> > > > > +#define _spin_lock(lock) __spin_lock(lock)
> > > > > +#else
> > > > > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > > > > +#endif
> > > 
> > > Dunno - i'm somewhat wary of introducing a 2^28 variability here. 
> > > (although the number of real variations is much lower - but still).
> > > 
> > > What's the current situation on s390, precisely which of the 28 lock 
> > > functions are a win to be inlined and which ones are a loss? Do you 
> > > have a list/table perhaps?
> > 
> > No list unfortunately. [...]
> 
> Well, if you dont know the functions you want to inline, how will
> you make intelligent use of this facility then in s390?

What I tried to say: in general we want to have all locking functions inlined.
This is because all our measurements show if anything gets inlined performance
improves. This is even true for large functions.

> Btw., i just noticed that s390 has CONFIG_PREEMPT turned on by 
> default in its defconfig. Have you made your measurements with 
> CONFIG_PREEMPT? If yes then the current inlining rules in spinlock.h 
> will turn all the locking APIs into functions.

All measurements were done with CONFIG_PREEMPT off.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 21:33           ` Heiko Carstens
@ 2009-08-16 21:36             ` Ingo Molnar
  0 siblings, 0 replies; 17+ messages in thread
From: Ingo Molnar @ 2009-08-16 21:36 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Andrew Morton, Linus Torvalds, Peter Zijlstra, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin


* Heiko Carstens <heiko.carstens@de.ibm.com> wrote:

> On Sun, Aug 16, 2009 at 10:48:31PM +0200, Ingo Molnar wrote:
> > * Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
> > > On Sun, Aug 16, 2009 at 08:06:31PM +0200, Ingo Molnar wrote:
> > > > * Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
> > > > > > #define __spin_lock_is_small
> > > > 
> > > > > > +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> > > > > > +							__acquires(lock);
> > > > > > +
> > > > > > +#ifdef __spin_lock_is_small
> > > > > > +#define _spin_lock(lock) __spin_lock(lock)
> > > > > > +#else
> > > > > > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > > > > > +#endif
> > > > 
> > > > Dunno - i'm somewhat wary of introducing a 2^28 variability here. 
> > > > (although the number of real variations is much lower - but still).
> > > > 
> > > > What's the current situation on s390, precisely which of the 28 lock 
> > > > functions are a win to be inlined and which ones are a loss? Do you 
> > > > have a list/table perhaps?
> > > 
> > > No list unfortunately. [...]
> > 
> > Well, if you dont know the functions you want to inline, how 
> > will you make intelligent use of this facility then in s390?
> 
> What I tried to say: in general we want to have all locking 
> functions inlined. This is because all our measurements show if 
> anything gets inlined performance improves. This is even true for 
> large functions.
> 
> > Btw., i just noticed that s390 has CONFIG_PREEMPT turned on by 
> > default in its defconfig. Have you made your measurements with 
> > CONFIG_PREEMPT? If yes then the current inlining rules in 
> > spinlock.h will turn all the locking APIs into functions.
> 
> All measurements were done with CONFIG_PREEMPT off.

ok.

( Btw., regardless of this patch-set, i suspect you _really_ want 
  compiler help for that - one with a global scope and which can go 
  inline even larger functions, and even if they were declared 
  global in the kernel. )

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-16 18:22     ` Linus Torvalds
@ 2009-08-17 15:46       ` Heiko Carstens
  0 siblings, 0 replies; 17+ messages in thread
From: Heiko Carstens @ 2009-08-17 15:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Arnd Bergmann, Horst Hartmann,
	Christian Ehrhardt, Nick Piggin

On Sun, Aug 16, 2009 at 11:22:45AM -0700, Linus Torvalds wrote:
> Other things going on, but here goes:
> > > +#ifdef __spin_lock_is_small
> > > +#define _spin_lock(lock) __spin_lock(lock)
> > > +#else
> > > +void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
> > > +#endif
> 
> The above repeats many times, and I found that irritating. I think you 
> should be able to get rid of a _lot_ of that by doing it like this instead
> 
> 	extern void __lockfunc _spin_lock(spinlock_t *lock) __acquires(lock);
> 	... plain declarations for all cases, no #ifdef's ...
> 
> and then at the end you just do
> 
> 	#ifdef __spin_lock_is_small
> 	  #define _spin_lock(lock) __spin_lock(lock)         
> 	#endif
> 	..
> 
> and at least make that repeating section be composed of simpler and 
> smaller entries.

Well, I decided to go for the ifdef variant, because that allows to easily
ifdef all functions in kernel/spinlock.c:

With

#ifdef __spin_lock_is_small
#define _spin_lock(lock) __spin_lock(lock)         
#endif

We can do this:

+#ifndef _spin_lock
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	__spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock);
+#endif

In addition this allows to overrule architecture settings by adding ifdefs
in one common place instead of requiring each architecture to get it right:

+#ifndef CONFIG_DEBUG_SPINLOCK
+#ifndef CONFIG_GENERIC_LOCKBREAK
+
+#ifdef __spin_lock_is_small
+#define _spin_lock(lock) __spin_lock(lock)
+#endif
+ [...]

This should also address Ingo's concerns.

Does this look ok now?

---
 include/linux/spinlock_api_smp.h |  119 +++++++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                |   56 ++++++++++++++++++
 2 files changed, 175 insertions(+)

Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -21,23 +21,29 @@
 #include <linux/debug_locks.h>
 #include <linux/module.h>
 
+#ifndef _spin_trylock
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	return __spin_trylock(lock);
 }
 EXPORT_SYMBOL(_spin_trylock);
+#endif
 
+#ifndef _read_trylock
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	return __read_trylock(lock);
 }
 EXPORT_SYMBOL(_read_trylock);
+#endif
 
+#ifndef _write_trylock
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	return __write_trylock(lock);
 }
 EXPORT_SYMBOL(_write_trylock);
+#endif
 
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
@@ -46,77 +52,101 @@ EXPORT_SYMBOL(_write_trylock);
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
+#ifndef _read_lock
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	__read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
+#endif
 
+#ifndef _spin_lock_irqsave
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
 	return __spin_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
+#endif
 
+#ifndef _spin_lock_irq
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	__spin_lock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
+#endif
 
+#ifndef _spin_lock_bh
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	__spin_lock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
+#endif
 
+#ifndef _read_lock_irqsave
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
 	return __read_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_read_lock_irqsave);
+#endif
 
+#ifndef _read_lock_irq
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	__read_lock_irq(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
+#endif
 
+#ifndef _read_lock_bh
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	__read_lock_bh(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
+#endif
 
+#ifndef _write_lock_irqsave
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
 	return __write_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_write_lock_irqsave);
+#endif
 
+#ifndef _write_lock_irq
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	__write_lock_irq(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
+#endif
 
+#ifndef _write_lock_bh
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	__write_lock_bh(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
+#endif
 
+#ifndef _spin_lock
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	__spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock);
+#endif
 
+#ifndef _write_lock
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	__write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock);
+#endif
 
 #else /* CONFIG_PREEMPT: */
 
@@ -242,83 +272,109 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 #endif
 
+#ifndef _spin_unlock
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
 	__spin_unlock(lock);
 }
 EXPORT_SYMBOL(_spin_unlock);
+#endif
 
+#ifndef _write_unlock
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
 	__write_unlock(lock);
 }
 EXPORT_SYMBOL(_write_unlock);
+#endif
 
+#ifndef _read_unlock
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
 	__read_unlock(lock);
 }
 EXPORT_SYMBOL(_read_unlock);
+#endif
 
+#ifndef _spin_unlock_irqrestore
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
 	__spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
+#endif
 
+#ifndef _spin_unlock_irq
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
 	__spin_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_irq);
+#endif
 
+#ifndef _spin_unlock_bh
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
 	__spin_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
+#endif
 
+#ifndef _read_unlock_irqrestore
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__read_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_read_unlock_irqrestore);
+#endif
 
+#ifndef _read_unlock_irq
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
 	__read_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_read_unlock_irq);
+#endif
 
+#ifndef _read_unlock_bh
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	__read_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_read_unlock_bh);
+#endif
 
+#ifndef _write_unlock_irqrestore
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__write_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_write_unlock_irqrestore);
+#endif
 
+#ifndef _write_unlock_irq
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
 	__write_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_write_unlock_irq);
+#endif
 
+#ifndef _write_unlock_bh
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	__write_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_write_unlock_bh);
+#endif
 
+#ifndef _spin_trylock_bh
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	return __spin_trylock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
+#endif
 
 notrace int in_lock_functions(unsigned long addr)
 {
Index: linux-2.6/include/linux/spinlock_api_smp.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_smp.h
+++ linux-2.6/include/linux/spinlock_api_smp.h
@@ -60,6 +60,125 @@ void __lockfunc _read_unlock_irqrestore(
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
+#ifndef CONFIG_DEBUG_SPINLOCK
+#ifndef CONFIG_GENERIC_LOCKBREAK
+
+#ifdef __spin_lock_is_small
+#define _spin_lock(lock) __spin_lock(lock)
+#endif
+
+#ifdef __read_lock_is_small
+#define _read_lock(lock) __read_lock(lock)
+#endif
+
+#ifdef __write_lock_is_small
+#define _write_lock(lock) __write_lock(lock)
+#endif
+
+#ifdef __spin_lock_bh_is_small
+#define _spin_lock_bh(lock) __spin_lock_bh(lock)
+#endif
+
+#ifdef __read_lock_bh_is_small
+#define _read_lock_bh(lock) __read_lock_bh(lock)
+#endif
+
+#ifdef __write_lock_bh_is_small
+#define _write_lock_bh(lock) __write_lock_bh(lock)
+#endif
+
+#ifdef __spin_lock_irq_is_small
+#define _spin_lock_irq(lock) __spin_lock_irq(lock)
+#endif
+
+#ifdef __read_lock_irq_is_small
+#define _read_lock_irq(lock) __read_lock_irq(lock)
+#endif
+
+#ifdef __write_lock_irq_is_small
+#define _write_lock_irq(lock) __write_lock_irq(lock)
+#endif
+
+#ifdef __spin_lock_irqsave_is_small
+#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
+#endif
+
+#ifdef __read_lock_irqsave_is_small
+#define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
+#endif
+
+#ifdef __write_lock_irqsave_is_small
+#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
+#endif
+
+#endif /* !CONFIG_GENERIC_LOCKBREAK */
+
+#ifdef __spin_trylock_is_small
+#define _spin_trylock(lock) __spin_trylock(lock)
+#endif
+
+#ifdef __read_trylock_is_small
+#define _read_trylock(lock) __read_trylock(lock)
+#endif
+
+#ifdef __write_trylock_is_small
+#define _write_trylock(lock) __write_trylock(lock)
+#endif
+
+#ifdef __spin_trylock_bh_is_small
+#define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
+#endif
+
+#ifdef __spin_unlock_is_small
+#define _spin_unlock(lock) __spin_unlock(lock)
+#endif
+
+#ifdef __read_unlock_is_small
+#define _read_unlock(lock) __read_unlock(lock)
+#endif
+
+#ifdef __write_unlock_is_small
+#define _write_unlock(lock) __write_unlock(lock)
+#endif
+
+#ifdef __spin_unlock_bh_is_small
+#define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
+#endif
+
+#ifdef __read_unlock_bh_is_small
+#define _read_unlock_bh(lock) __read_unlock_bh(lock)
+#endif
+
+#ifdef __write_unlock_bh_is_small
+#define _write_unlock_bh(lock) __write_unlock_bh(lock)
+#endif
+
+#ifdef __spin_unlock_irq_is_small
+#define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
+#endif
+
+#ifdef __read_unlock_irq_is_small
+#define _read_unlock_irq(lock) __read_unlock_irq(lock)
+#endif
+
+#ifdef __write_unlock_irq_is_small
+#define _write_unlock_irq(lock) __write_unlock_irq(lock)
+#endif
+
+#ifdef __spin_unlock_irqrestore_is_small
+#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
+#endif
+
+#ifdef __read_unlock_irqrestore_is_small
+#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
+#endif
+
+#ifdef __write_unlock_irqrestore_is_small
+#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
+#endif
+
+#endif /* CONFIG_DEBUG_SPINLOCK */
+
 static inline int __spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [patch 2/3] spinlock: allow inlined spinlocks
  2009-08-12 18:39 [patch 0/3] Allow inlined spinlocks again V3 Heiko Carstens
@ 2009-08-12 18:39 ` Heiko Carstens
  0 siblings, 0 replies; 17+ messages in thread
From: Heiko Carstens @ 2009-08-12 18:39 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, linux-arch,
	Martin Schwidefsky, Heiko Carstens, Arnd Bergmann,
	Horst Hartmann, Christian Ehrhardt

[-- Attachment #1: 02_spinlock_inline.diff --]
[-- Type: text/plain, Size: 4678 bytes --]

From: Heiko Carstens <heiko.carstens@de.ibm.com>

Add new config option SPINLOCK_INLINE and some defines which depend
on it in order to generate inlined spinlock code instead of out-of-line
code.
Avoiding function calls for spinlocks gives 1%-5% less cpu usage on
network benchmarks on s390.

Architectures must select HAVE_SPINLOCK_INLINE_SUPPORT to enable this
config option.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 include/linux/spinlock_api_smp.h |   35 +++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                |    4 ++++
 lib/Kconfig.debug                |   14 ++++++++++++++
 3 files changed, 53 insertions(+)

Index: linux-2.6/include/linux/spinlock_api_smp.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_smp.h
+++ linux-2.6/include/linux/spinlock_api_smp.h
@@ -19,6 +19,8 @@ int in_lock_functions(unsigned long addr
 
 #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
 
+#ifndef CONFIG_SPINLOCK_INLINE
+
 void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
 void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
 							__acquires(lock);
@@ -60,6 +62,39 @@ void __lockfunc _read_unlock_irqrestore(
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
+#else /* CONFIG_HAVE_SPINLOCK_INLINE_SUPPORT */
+
+#define _spin_trylock(lock)	  __spin_trylock(lock)
+#define _read_trylock(lock)	  __read_trylock(lock)
+#define _write_trylock(lock)	  __write_trylock(lock)
+#define _read_lock(lock)	  __read_lock(lock)
+#define _spin_lock_irqsave(lock)  __spin_lock_irqsave(lock)
+#define _spin_lock_irq(lock)	  __spin_lock_irq(lock)
+#define _spin_lock_bh(lock)	  __spin_lock_bh(lock)
+#define _read_lock_irqsave(lock)  __read_lock_irqsave(lock)
+#define _read_lock_irq(lock)	  __read_lock_irq(lock)
+#define _read_lock_bh(lock)	  __read_lock_bh(lock)
+#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
+#define _write_lock_irq(lock)	  __write_lock_irq(lock)
+#define _write_lock_bh(lock)	  __write_lock_bh(lock)
+#define _spin_lock(lock)	  __spin_lock(lock)
+#define _write_lock(lock)	  __write_lock(lock)
+#define _spin_unlock(lock)	  __spin_unlock(lock)
+#define _write_unlock(lock)	  __write_unlock(lock)
+#define _read_unlock(lock)	  __read_unlock(lock)
+#define _spin_unlock_irq(lock)	  __spin_unlock_irq(lock)
+#define _spin_unlock_bh(lock)	  __spin_unlock_bh(lock)
+#define _read_unlock_irq(lock)	  __read_unlock_irq(lock)
+#define _read_unlock_bh(lock)	  __read_unlock_bh(lock)
+#define _write_unlock_irq(lock)	  __write_unlock_irq(lock)
+#define _write_unlock_bh(lock)	  __write_unlock_bh(lock)
+#define _spin_trylock_bh(lock)	  __spin_trylock_bh(lock)
+#define _spin_unlock_irqrestore(lock, flags)  __spin_unlock_irqrestore(lock, flags)
+#define _read_unlock_irqrestore(lock, flags)  __read_unlock_irqrestore(lock, flags)
+#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
+
+#endif /* CONFIG_HAVE_SPINLOCK_INLINE_SUPPORT */
+
 static inline int __spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
Index: linux-2.6/lib/Kconfig.debug
===================================================================
--- linux-2.6.orig/lib/Kconfig.debug
+++ linux-2.6/lib/Kconfig.debug
@@ -879,6 +879,20 @@ config SYSCTL_SYSCALL_CHECK
 	  to properly maintain and use. This enables checks that help
 	  you to keep things correct.
 
+config HAVE_SPINLOCK_INLINE_SUPPORT
+	bool
+
+config SPINLOCK_INLINE
+	bool "Inline spinlock code"
+	depends on HAVE_SPINLOCK_INLINE_SUPPORT
+	depends on !DEBUG_SPINLOCK
+	depends on SMP && !PREEMPT
+	help
+	  Select this option if you want to have inlined spinlock code instead
+	  of an out of line implementation.
+	  This will generate a larger kernel image. On some architectures this
+	  increases performance.
+
 source mm/Kconfig.debug
 source kernel/trace/Kconfig
 
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -21,6 +21,8 @@
 #include <linux/debug_locks.h>
 #include <linux/module.h>
 
+#ifndef CONFIG_SPINLOCK_INLINE
+
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	return __spin_trylock(lock);
@@ -320,6 +322,8 @@ int __lockfunc _spin_trylock_bh(spinlock
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
 
+#endif /* CONFIG_HAVE_SPINLOCK_INLINE_SUPPORT */
+
 notrace int in_lock_functions(unsigned long addr)
 {
 	/* Linker adds these: start and end of __lockfunc functions */

-- 

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2009-08-17 15:46 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-14 12:58 [patch 0/3] Allow inlined spinlocks again V4 Heiko Carstens
2009-08-14 12:58 ` [patch 1/3] spinlock: move spinlock function bodies to header file Heiko Carstens
2009-08-14 12:58 ` [patch 2/3] spinlock: allow inlined spinlocks Heiko Carstens
2009-08-16 17:57   ` Heiko Carstens
2009-08-16 18:06     ` Ingo Molnar
2009-08-16 18:43       ` Linus Torvalds
2009-08-16 20:24         ` Ingo Molnar
2009-08-16 21:07           ` Linus Torvalds
2009-08-16 21:18             ` Ingo Molnar
2009-08-16 18:44       ` Heiko Carstens
2009-08-16 20:48         ` Ingo Molnar
2009-08-16 21:33           ` Heiko Carstens
2009-08-16 21:36             ` Ingo Molnar
2009-08-16 18:22     ` Linus Torvalds
2009-08-17 15:46       ` Heiko Carstens
2009-08-14 12:58 ` [patch 3/3] spinlock: inline code for all locking variants on s390 Heiko Carstens
  -- strict thread matches above, loose matches on Subject: below --
2009-08-12 18:39 [patch 0/3] Allow inlined spinlocks again V3 Heiko Carstens
2009-08-12 18:39 ` [patch 2/3] spinlock: allow inlined spinlocks Heiko Carstens

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.