All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>, Peter Anvin <hpa@zytor.com>,
	Mike Galbraith <bitbucket@online.de>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arjan van de Ven <arjan@linux.intel.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions
Date: Tue, 17 Sep 2013 11:10:47 +0200	[thread overview]
Message-ID: <20130917091143.387880231@infradead.org> (raw)
In-Reply-To: 20130917082838.218329307@infradead.org

[-- Attachment #1: peterz-rmwcc.patch --]
[-- Type: text/plain, Size: 8130 bytes --]

Linus suggested using asm goto to get rid of the typical SETcc + TEST
instruction pair -- which also clobbers an extra register -- for our
typical modify_and_test() functions.

Because asm goto doesn't allow output fields it has to include an
unconditinal memory clobber when it changes a memory variable to force
a reload.

Luckily all atomic ops already imply a compiler barrier to go along
with their memory barrier semantics.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/atomic.h      |   29 ++++----------------------
 arch/x86/include/asm/atomic64_64.h |   28 +++----------------------
 arch/x86/include/asm/bitops.h      |   24 +++------------------
 arch/x86/include/asm/local.h       |   28 +++----------------------
 arch/x86/include/asm/rmwcc.h       |   41 +++++++++++++++++++++++++++++++++++++
 5 files changed, 58 insertions(+), 92 deletions(-)

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -6,6 +6,7 @@
 #include <asm/processor.h>
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
+#include <asm/rmwcc.h>
 
 /*
  * Atomic operations that C can't guarantee us.  Useful for
@@ -76,12 +77,7 @@ static inline void atomic_sub(int i, ato
  */
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e");
 }
 
 /**
@@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *
  */
 static inline int atomic_dec_and_test(atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "decl %0; sete %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
 }
 
 /**
@@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(at
  */
 static inline int atomic_inc_and_test(atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "incl %0; sete %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
 }
 
 /**
@@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(at
  */
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s");
 }
 
 /**
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -72,12 +72,7 @@ static inline void atomic64_sub(long i,
  */
 static inline int atomic64_sub_and_test(long i, atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "er" (i), "m" (v->counter) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e");
 }
 
 /**
@@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64
  */
 static inline int atomic64_dec_and_test(atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "decq %0; sete %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "m" (v->counter) : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
 }
 
 /**
@@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(
  */
 static inline int atomic64_inc_and_test(atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "incq %0; sete %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "m" (v->counter) : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
 }
 
 /**
@@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(
  */
 static inline int atomic64_add_negative(long i, atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "er" (i), "m" (v->counter) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s");
 }
 
 /**
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -14,6 +14,7 @@
 
 #include <linux/compiler.h>
 #include <asm/alternative.h>
+#include <asm/rmwcc.h>
 
 #if BITS_PER_LONG == 32
 # define _BITOPS_LONG_SHIFT 5
@@ -204,12 +205,7 @@ static inline void change_bit(long nr, v
  */
 static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
-
-	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
-		     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
-	return oldbit;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c");
 }
 
 /**
@@ -255,13 +251,7 @@ static inline int __test_and_set_bit(lon
  */
 static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
-
-	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
-	return oldbit;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c");
 }
 
 /**
@@ -314,13 +304,7 @@ static inline int __test_and_change_bit(
  */
 static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
-
-	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
-	return oldbit;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c");
 }
 
 static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -52,12 +52,7 @@ static inline void local_sub(long i, loc
  */
 static inline int local_sub_and_test(long i, local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_SUB "%2,%0; sete %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e");
 }
 
 /**
@@ -70,12 +65,7 @@ static inline int local_sub_and_test(lon
  */
 static inline int local_dec_and_test(local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_DEC "%0; sete %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
 }
 
 /**
@@ -88,12 +78,7 @@ static inline int local_dec_and_test(loc
  */
 static inline int local_inc_and_test(local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_INC "%0; sete %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
 }
 
 /**
@@ -107,12 +92,7 @@ static inline int local_inc_and_test(loc
  */
 static inline int local_add_negative(long i, local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_ADD "%2,%0; sets %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s");
 }
 
 /**
--- /dev/null
+++ b/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_X86_RMWcc
+#define _ASM_X86_RMWcc
+
+#ifdef CC_HAVE_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...)				\
+do {									\
+	asm volatile goto (fullop "; j" cc " %l[cc_label]"		\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\
+	__GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, val, arg0, cc)			\
+	__GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val))
+
+#else /* !CC_HAVE_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...)				\
+do {									\
+	char c;								\
+	asm volatile (fullop "; set" cc " %1"				\
+			: "+m" (var), "=qm" (c)				\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
+	__GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, val, arg0, cc)			\
+	__GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val))
+
+#endif /* CC_HAVE_ASM_GOTO */
+
+#endif /* _ASM_X86_RMWcc */



  reply	other threads:[~2013-09-17  9:15 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
2013-09-17  9:10 ` Peter Zijlstra [this message]
2013-09-18 18:44   ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Linus Torvalds
     [not found]     ` <4ec87843-c29a-401a-a54f-2cd4d61fba62@email.android.com>
2013-09-19  8:31       ` Andi Kleen
2013-09-19  9:39         ` Ingo Molnar
2013-09-20  4:43         ` H. Peter Anvin
2013-09-17  9:10 ` [PATCH 02/11] sched, rcu: Make RCU use resched_cpu() Peter Zijlstra
2013-09-17 14:40   ` Peter Zijlstra
2013-09-23 16:55     ` Paul E. McKenney
2013-09-23 21:18       ` Paul E. McKenney
2013-09-24  8:07         ` Peter Zijlstra
2013-09-24 13:37           ` Paul E. McKenney
2013-09-17  9:10 ` [PATCH 03/11] sched: Remove {set,clear}_need_resched Peter Zijlstra
2013-09-17  9:10 ` [PATCH 04/11] sched, idle: Fix the idle polling state logic Peter Zijlstra
2013-09-17  9:10 ` [PATCH 05/11] sched: Introduce preempt_count accessor functions Peter Zijlstra
2013-09-17  9:10 ` [PATCH 06/11] sched: Add NEED_RESCHED to the preempt_count Peter Zijlstra
2013-09-17  9:10 ` [PATCH 07/11] sched, arch: Create asm/preempt.h Peter Zijlstra
2013-09-17  9:10 ` [PATCH 08/11] sched: Create more preempt_count accessors Peter Zijlstra
2013-09-17  9:10 ` [PATCH 09/11] sched: Extract the basic add/sub preempt_count modifiers Peter Zijlstra
2013-09-17  9:10 ` [PATCH 10/11] sched, x86: Provide a per-cpu preempt_count implementation Peter Zijlstra
2013-09-17  9:10 ` [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call Peter Zijlstra
2013-09-17 20:23   ` Peter Zijlstra
2013-09-17 10:53 ` [PATCH 00/11] preempt_count rework -v3 Ingo Molnar
2013-09-17 11:22   ` Peter Zijlstra
2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
2013-09-17 20:00     ` Geert Uytterhoeven
2013-09-17 21:24       ` Thomas Gleixner
2013-09-18 14:06         ` Thomas Gleixner
2013-09-19 15:14           ` Thomas Gleixner
2013-09-19 17:02             ` Andreas Schwab
2013-09-19 18:19               ` Geert Uytterhoeven
2013-09-20  9:26                 ` Thomas Gleixner
2013-11-04 12:06                 ` Thomas Gleixner
2013-11-04 19:44                   ` Geert Uytterhoeven
2013-11-04 19:44                     ` Geert Uytterhoeven
2013-11-06 17:23                     ` Thomas Gleixner
2013-11-07 14:12                       ` Geert Uytterhoeven
2013-11-07 16:39                         ` Thomas Gleixner
2013-11-10  8:49                           ` Michael Schmitz
2013-11-10  9:12                             ` Geert Uytterhoeven
2013-11-11 14:11                               ` Thomas Gleixner
2013-11-11 19:34                                 ` Thomas Gleixner
2013-11-11 20:52                                   ` Thomas Gleixner
2013-11-12  6:56                                     ` Michael Schmitz
2013-11-12  6:56                                       ` Michael Schmitz
2013-11-12  8:44                                       ` schmitz
2013-11-12  8:44                                         ` schmitz
2013-11-12 15:08                                     ` Geert Uytterhoeven
2013-11-13 19:42                                     ` [tip:irq/urgent] m68k: Simplify low level interrupt handling code tip-bot for Thomas Gleixner
2013-11-12 14:09                                   ` [patch 1/6] hardirq: Make hardirq bits generic Geert Uytterhoeven
2013-11-11 19:42                                 ` Andreas Schwab
2013-11-12  9:18                                   ` Thomas Gleixner
2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 2/6] h8300: Use schedule_preempt_irq Thomas Gleixner
2013-09-20 17:41     ` Guenter Roeck
2013-09-20 21:46       ` Thomas Gleixner
2013-09-17 18:53   ` [patch 3/6] m32r: Use preempt_schedule_irq Thomas Gleixner
2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
2013-09-17 22:54     ` David Miller
2013-09-17 23:23       ` Thomas Gleixner
2013-09-18  0:12         ` David Miller
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-11-20 19:59     ` [patch 4/6] " Tony Luck
2013-11-20 20:57       ` Thomas Gleixner
2013-11-21 11:41         ` Thomas Gleixner
2013-11-21 12:39           ` Frederic Weisbecker
2013-11-21 13:06           ` Peter Zijlstra
2013-11-21 13:30             ` Thomas Gleixner
2013-11-21 18:57               ` Tony Luck
2013-11-26 18:37                 ` Tony Luck
2013-11-26 18:58                   ` Peter Zijlstra
2013-11-27 13:36                     ` Ingo Molnar
2013-11-27 14:07           ` [tip:sched/urgent] sched: Expose preempt_schedule_irq() tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
2013-09-18 10:48     ` Peter Zijlstra
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130917091143.387880231@infradead.org \
    --to=peterz@infradead.org \
    --cc=ak@linux.intel.com \
    --cc=arjan@linux.intel.com \
    --cc=bitbucket@online.de \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.