All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
To: akpm@linux-foundation.org, Ingo Molnar <mingo@elte.hu>,
	linux-kernel@vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
	mark.langsdorf@amd.com, arekm@maven.pl,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andi Kleen <andi@firstfloor.org>, Avi Kivity <avi@qumranet.com>
Subject: [patch 2/2] x86 amd fix cmpxchg read acquire barrier
Date: Wed, 22 Apr 2009 16:18:54 -0400	[thread overview]
Message-ID: <20090422202453.829846363@polymtl.ca> (raw)
In-Reply-To: 20090422201852.092307236@polymtl.ca

[-- Attachment #1: x86-amd-fix-cmpxchg-read-acquire-barrier.patch --]
[-- Type: text/plain, Size: 8823 bytes --]

http://google-perftools.googlecode.com/svn-history/r48/trunk/src/base/atomicops-internals-x86.cc

says

"  // Opteron Rev E has a bug in which on very rare occasions a locked
  // instruction doesn't act as a read-acquire barrier if followed by a
  // non-locked read-modify-write instruction.  Rev F has this bug in 
  // pre-release versions, but not in versions released to customers,
  // so we test only for Rev E, which is family 15, model 32..63 inclusive.
  if (strcmp(vendor, "AuthenticAMD") == 0 &&       // AMD
      family == 15 &&
      32 <= model && model <= 63) {
    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;
  } else {
    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;
  }
"

There is no official AMD bug ID yet, but it seems to be reported in the field
and a fix is present in Solaris code base. The following links shows the current
understanding of the issue.

http://bugzilla.kernel.org/show_bug.cgi?id=11305
http://bugs.mysql.com/bug.php?id=26081

Since I needed to put the alternative within assembly statements, I decided to
rework alternative.h and fold the multiple alternative*() definitions into the
same ALTERNATIVE() macro. It is done in the previous patch. Those were
duplicated code anyway.

AFAIK, this bug has not received any official bug ID from AMD. Solaris already
have a workaround for this issue.

One should probably audit Xen cmpxchg too.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: mark.langsdorf@amd.com
CC: arekm@maven.pl
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Andi Kleen <andi@firstfloor.org>
CC: Avi Kivity <avi@qumranet.com>
---
 arch/x86/include/asm/alternative.h |    5 +++++
 arch/x86/include/asm/cmpxchg_64.h  |   21 ++++++++++++++-------
 arch/x86/include/asm/cpufeature.h  |    1 +
 arch/x86/include/asm/futex.h       |    2 ++
 arch/x86/include/asm/rwsem.h       |    1 +
 arch/x86/include/asm/spinlock.h    |    2 ++
 arch/x86/kernel/cpu/amd.c          |    4 ++++
 7 files changed, 29 insertions(+), 7 deletions(-)

Index: linux-2.6-lttng/arch/x86/include/asm/cpufeature.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/cpufeature.h	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/cpufeature.h	2009-04-18 23:58:28.000000000 -0400
@@ -94,6 +94,7 @@
 #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
 #define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
+#define X86_FEATURE_NEED_CMPXCHG_LFENCE	(3*32+26) /* Fix AMD cmpxchg lfence */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
Index: linux-2.6-lttng/arch/x86/kernel/cpu/amd.c
===================================================================
--- linux-2.6-lttng.orig/arch/x86/kernel/cpu/amd.c	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/kernel/cpu/amd.c	2009-04-18 23:58:28.000000000 -0400
@@ -416,6 +416,10 @@ static void __cpuinit init_amd(struct cp
 		clear_cpu_cap(c, X86_FEATURE_MCE);
 #endif
 
+	/* Enable workaround for missing AMD cmpxchg lfence */
+	if (c->x86 == 0xf && c->x86_model >= 32 && c->x86_model <= 63)
+		set_cpu_cap(c, X86_FEATURE_NEED_CMPXCHG_LFENCE);
+
 	/* Enable workaround for FXSAVE leak */
 	if (c->x86 >= 6)
 		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
Index: linux-2.6-lttng/arch/x86/include/asm/cmpxchg_64.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/cmpxchg_64.h	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/cmpxchg_64.h	2009-04-18 23:58:28.000000000 -0400
@@ -66,25 +66,29 @@ static inline unsigned long __cmpxchg(vo
 	unsigned long prev;
 	switch (size) {
 	case 1:
-		asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+		asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
 		return prev;
 	case 2:
-		asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+		asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
 		return prev;
 	case 4:
-		asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
+		asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
 		return prev;
 	case 8:
-		asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+		asm volatile(LOCK_PREFIX "cmpxchgq %1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
@@ -105,19 +109,22 @@ static inline unsigned long __sync_cmpxc
 	unsigned long prev;
 	switch (size) {
 	case 1:
-		asm volatile("lock; cmpxchgb %b1,%2"
+		asm volatile("lock; cmpxchgb %b1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
 		return prev;
 	case 2:
-		asm volatile("lock; cmpxchgw %w1,%2"
+		asm volatile("lock; cmpxchgw %w1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
 		return prev;
 	case 4:
-		asm volatile("lock; cmpxchgl %1,%2"
+		asm volatile("lock; cmpxchgl %1,%2\n\t"
+			     CMPXCHG_LFENCE
 			     : "=a"(prev)
 			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 			     : "memory");
Index: linux-2.6-lttng/arch/x86/include/asm/alternative.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/alternative.h	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/alternative.h	2009-04-18 23:58:28.000000000 -0400
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
+#include <asm/nops.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -54,6 +55,10 @@ struct alt_instr {
 #endif
 };
 
+/* Needed both in UP and SMP build because of cmpxchg_sync */
+#define CMPXCHG_LFENCE 	ALTERNATIVE(ASM_NOP3, "lfence",	\
+				    X86_FEATURE_NEED_CMPXCHG_LFENCE)
+
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 
Index: linux-2.6-lttng/arch/x86/include/asm/spinlock.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/spinlock.h	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/spinlock.h	2009-04-18 23:58:28.000000000 -0400
@@ -86,6 +86,7 @@ static __always_inline int __ticket_spin
 		     "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
 		     "jne 1f\n\t"
 		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
+		     CMPXCHG_LFENCE "\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
@@ -139,6 +140,7 @@ static __always_inline int __ticket_spin
 		     "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
 		     "jne 1f\n\t"
 		     LOCK_PREFIX "cmpxchgl %1,%2\n\t"
+		     CMPXCHG_LFENCE "\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
Index: linux-2.6-lttng/arch/x86/include/asm/rwsem.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/rwsem.h	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/rwsem.h	2009-04-18 23:58:28.000000000 -0400
@@ -129,6 +129,7 @@ static inline int __down_read_trylock(st
 		     "  addl      %3,%2\n\t"
 		     "  jle	     2f\n\t"
 		     LOCK_PREFIX "  cmpxchgl  %2,%0\n\t"
+		     CMPXCHG_LFENCE "\n\t"
 		     "  jnz	     1b\n\t"
 		     "2:\n\t"
 		     "# ending __down_read_trylock\n\t"
Index: linux-2.6-lttng/arch/x86/include/asm/futex.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/futex.h	2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/futex.h	2009-04-18 23:58:28.000000000 -0400
@@ -26,6 +26,7 @@
 		     "\tmovl\t%0, %3\n"				\
 		     "\t" insn "\n"				\
 		     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
+		     CMPXCHG_LFENCE "\n"			\
 		     "\tjnz\t1b\n"				\
 		     "3:\t.section .fixup,\"ax\"\n"		\
 		     "4:\tmov\t%5, %1\n"			\
@@ -123,6 +124,7 @@ static inline int futex_atomic_cmpxchg_i
 		return -EFAULT;
 
 	asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
+		     CMPXCHG_LFENCE "\n"
 		     "2:\t.section .fixup, \"ax\"\n"
 		     "3:\tmov     %2, %0\n"
 		     "\tjmp     2b\n"

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

  parent reply	other threads:[~2009-04-22 20:48 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-22 20:18 [patch 0/2] Fixing AMD cmpxchg "missing lfence" mess Mathieu Desnoyers
2009-04-22 20:18 ` [patch 1/2] x86: cleanup alternative.h Mathieu Desnoyers
2009-04-28 13:05   ` [GIT PULL] " Mathieu Desnoyers
2009-04-28 14:58     ` Ingo Molnar
2009-04-28 15:13       ` [GIT PULL] x86: cleanup alternative.h (v2) Mathieu Desnoyers
2009-04-29  6:17         ` [tip:x86/asm] x86: clean up alternative.h tip-bot for Mathieu Desnoyers
2009-04-29  6:22           ` Mathieu Desnoyers
2009-04-28 17:40       ` [GIT PULL] x86: cleanup alternative.h H. Peter Anvin
2009-04-28 18:11         ` Mathieu Desnoyers
2009-04-28 18:13         ` [GIT PULL] x86: cleanup alternative.h (v3) Mathieu Desnoyers
2009-04-22 20:18 ` Mathieu Desnoyers [this message]
2009-04-22 20:59   ` [patch 2/2] x86 amd fix cmpxchg read acquire barrier Alan Cox
2009-04-22 22:47     ` Mathieu Desnoyers
2009-04-22 22:53       ` Alan Cox
2009-04-22 23:26         ` Mathieu Desnoyers
2009-04-23  8:06   ` Ingo Molnar
2009-04-23 13:19     ` Mathieu Desnoyers
2009-04-23 13:41       ` Arkadiusz Miskiewicz
2009-04-23 22:17         ` Mathieu Desnoyers
2009-04-25  8:19   ` Pavel Machek
2009-05-02 15:55     ` Mathieu Desnoyers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090422202453.829846363@polymtl.ca \
    --to=mathieu.desnoyers@polymtl.ca \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=arekm@maven.pl \
    --cc=avi@qumranet.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.langsdorf@amd.com \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.