All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4, v2] x86: enlightenment for ticket spin locks - eliminate NOPs introduced by first patch
@ 2010-06-29 14:33 Jan Beulich
  2010-06-30  1:13 ` H. Peter Anvin
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2010-06-29 14:33 UTC (permalink / raw)
  To: mingo, tglx, hpa; +Cc: jeremy.fitzhardinge, Ky Srinivasan, linux-kernel

Under the assumption that the nop-s added by the base ticket spinlock
enlightenment patch might be considered undesirable (or worse), here
is an optional patch to eliminate these nop-s again. This is done
through extending the memory operands of the inc instructions used for
unlocking ticket locks to the necessary size, using assembler and
linker features.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: KY Srinivasan <ksrinivasan@novell.com>

---
 arch/x86/Makefile                      |    3 +
 arch/x86/include/asm/alternative-asm.h |   59 +++++++++++++++++++++++++++++++++
 arch/x86/include/asm/alternative.h     |    5 ++
 arch/x86/include/asm/spinlock.h        |    7 +--
 arch/x86/kernel/symdefs.lds            |    1 
 arch/x86/kernel/vmlinux.lds.S          |    2 +
 6 files changed, 72 insertions(+), 5 deletions(-)

--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/Makefile
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/Makefile
@@ -87,6 +87,9 @@ ifeq ($(CONFIG_KMEMCHECK),y)
 	KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
 endif
 
+KBUILD_CFLAGS += -Wa,-I$(srctree)/arch/x86/include
+LDFLAGS_MODULE += -T $(srctree)/arch/x86/kernel/symdefs.lds
+
 # Stackpointer is addressed different for 32 bit and 64 bit x86
 sp-$(CONFIG_X86_32) := esp
 sp-$(CONFIG_X86_64) := rsp
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/alternative.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/alternative.h
@@ -6,6 +6,11 @@
 #include <linux/stringify.h>
 #include <asm/asm.h>
 
+#if !defined(__ASSEMBLY__) && !defined(__PIC__)
+#include <asm/alternative-asm.h> /* just for tracking the build dependency */
+__asm__(".include \"asm/alternative-asm.h\"");
+#endif
+
 /*
  * Alternative inline assembly for SMP.
  *
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/alternative-asm.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/alternative-asm.h
@@ -1,3 +1,7 @@
+#if 0 /* Hide this from compiler. */
+	.if 0 # Hide assembly source stuff when assembling compiler output.
+#endif
+
 #ifdef __ASSEMBLY__
 
 #include <asm/asm.h>
@@ -16,3 +20,58 @@
 #endif
 
 #endif  /*  __ASSEMBLY__  */
+
+#if 0 /* Hide this from compiler. */
+	.else # Code to be used in compiler output:
+
+	.weak _$.zero
+
+	.macro unary opc arg1 arg2 arg3
+	 .Lempty=2
+	 .irpc c,"\arg2"
+	  .Lempty=3
+	 .endr
+	 .irpc c,"\arg3"
+	  .Lempty=0
+	 .endr
+	 .Lsym=1
+	 .Lnum=0
+	 .irpc c,"\arg1"
+	  .irpc m,"(123456789-0"
+	   .ifeqs "\c","\m"
+	    .Lsym=0
+	    .exitm
+	   .endif
+	   .Lnum=1
+	  .endr
+	  .exitm
+	 .endr
+	 .if .Lempty == 2
+	  .if .Lsym
+	   \opc \arg1
+	  .elseif .Lnum
+	   \opc _$.zero+\arg1
+	  .else
+	   \opc _$.zero\arg1
+	  .endif
+	 .elseif .Lempty == 3
+	  .if .Lsym
+	   \opc \arg1,\arg2
+	  .elseif .Lnum
+	   \opc _$.zero+\arg1,\arg2
+	  .else
+	   \opc _$.zero\arg1,\arg2
+	  .endif
+	 .else
+	  .if .Lsym
+	   \opc \arg1,\arg2,\arg3
+	  .elseif .Lnum
+	   \opc _$.zero+\arg1,\arg2,\arg3
+	  .else
+	   \opc _$.zero\arg1,\arg2,\arg3
+	  .endif
+	 .endif
+	.endm
+
+	.endif
+#endif
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
@@ -10,7 +10,6 @@
 
 #ifdef CONFIG_ENLIGHTEN_SPINLOCKS
 #include <asm/alternative.h>
-#include <asm/nops.h>
 /* Including asm/smp.h here causes a cyclic include dependency. */
 #include <asm/percpu.h>
 DECLARE_PER_CPU(int, cpu_number);
@@ -156,8 +155,7 @@ static __always_inline void __ticket_spi
 #else
 	unsigned int token;
 
-	alternative_io(UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
-		ASM_NOP3,
+	alternative_io(UNLOCK_LOCK_PREFIX "unary incb %[lock]\n\t",
 		ALTERNATIVE_TICKET_UNLOCK_HEAD
 		UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
 		"movzwl %[lock], %[token]\n\t"
@@ -228,8 +226,7 @@ static __always_inline void __ticket_spi
 #else
 	unsigned int token, tmp;
 
-	alternative_io(UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
-		ASM_NOP2,
+	alternative_io(UNLOCK_LOCK_PREFIX "unary incw %[lock]\n\t",
 		ALTERNATIVE_TICKET_UNLOCK_HEAD
 		UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
 		"movl %[lock], %[token]\n\t"
--- /dev/null
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/symdefs.lds
@@ -0,0 +1 @@
+_$.zero = 0;
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/vmlinux.lds.S
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/vmlinux.lds.S
@@ -27,6 +27,8 @@
 #include <asm/cache.h>
 #include <asm/boot.h>
 
+#include "symdefs.lds"
+
 #undef i386     /* in case the preprocessor is a 32bit one */
 
 OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 3/4, v2] x86: enlightenment for ticket spin locks -  eliminate NOPs introduced by first patch
  2010-06-29 14:33 [PATCH 3/4, v2] x86: enlightenment for ticket spin locks - eliminate NOPs introduced by first patch Jan Beulich
@ 2010-06-30  1:13 ` H. Peter Anvin
  2010-06-30  7:07   ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: H. Peter Anvin @ 2010-06-30  1:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: mingo, tglx, jeremy.fitzhardinge, Ky Srinivasan, linux-kernel

On 06/29/2010 07:33 AM, Jan Beulich wrote:
> Under the assumption that the nop-s added by the base ticket spinlock
> enlightenment patch might be considered undesirable (or worse), here
> is an optional patch to eliminate these nop-s again. This is done
> through extending the memory operands of the inc instructions used for
> unlocking ticket locks to the necessary size, using assembler and
> linker features.
> 
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
> @@ -10,7 +10,6 @@
>  
>  #ifdef CONFIG_ENLIGHTEN_SPINLOCKS
>  #include <asm/alternative.h>
> -#include <asm/nops.h>
>  /* Including asm/smp.h here causes a cyclic include dependency. */
>  #include <asm/percpu.h>
>  DECLARE_PER_CPU(int, cpu_number);
> @@ -156,8 +155,7 @@ static __always_inline void __ticket_spi
>  #else
>  	unsigned int token;
>  
> -	alternative_io(UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
> -		ASM_NOP3,
> +	alternative_io(UNLOCK_LOCK_PREFIX "unary incb %[lock]\n\t",
>  		ALTERNATIVE_TICKET_UNLOCK_HEAD
>  		UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
>  		"movzwl %[lock], %[token]\n\t"
> @@ -228,8 +226,7 @@ static __always_inline void __ticket_spi
>  #else
>  	unsigned int token, tmp;
>  
> -	alternative_io(UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
> -		ASM_NOP2,
> +	alternative_io(UNLOCK_LOCK_PREFIX "unary incw %[lock]\n\t",
>  		ALTERNATIVE_TICKET_UNLOCK_HEAD
>  		UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
>  		"movl %[lock], %[token]\n\t"

If you're stretching (bloating) them anyway, perhaps we should be using
"add" instructions instead, with their better EFLAGS behavior?

	-hpa

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 3/4, v2] x86: enlightenment for ticket spin locks - eliminate NOPs introduced by first patch
  2010-06-30  1:13 ` H. Peter Anvin
@ 2010-06-30  7:07   ` Jan Beulich
  2010-06-30 17:13     ` H. Peter Anvin
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2010-06-30  7:07 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: jeremy.fitzhardinge, mingo, tglx, Ky Srinivasan, linux-kernel

>>> On 30.06.10 at 03:13, "H. Peter Anvin" <hpa@zytor.com> wrote:
> On 06/29/2010 07:33 AM, Jan Beulich wrote:
>> Under the assumption that the nop-s added by the base ticket spinlock
>> enlightenment patch might be considered undesirable (or worse), here
>> is an optional patch to eliminate these nop-s again. This is done
>> through extending the memory operands of the inc instructions used for
>> unlocking ticket locks to the necessary size, using assembler and
>> linker features.
>> 
>> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
>> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
>> @@ -10,7 +10,6 @@
>>  
>>  #ifdef CONFIG_ENLIGHTEN_SPINLOCKS
>>  #include <asm/alternative.h>
>> -#include <asm/nops.h>
>>  /* Including asm/smp.h here causes a cyclic include dependency. */
>>  #include <asm/percpu.h>
>>  DECLARE_PER_CPU(int, cpu_number);
>> @@ -156,8 +155,7 @@ static __always_inline void __ticket_spi
>>  #else
>>  	unsigned int token;
>>  
>> -	alternative_io(UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
>> -		ASM_NOP3,
>> +	alternative_io(UNLOCK_LOCK_PREFIX "unary incb %[lock]\n\t",
>>  		ALTERNATIVE_TICKET_UNLOCK_HEAD
>>  		UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
>>  		"movzwl %[lock], %[token]\n\t"
>> @@ -228,8 +226,7 @@ static __always_inline void __ticket_spi
>>  #else
>>  	unsigned int token, tmp;
>>  
>> -	alternative_io(UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
>> -		ASM_NOP2,
>> +	alternative_io(UNLOCK_LOCK_PREFIX "unary incw %[lock]\n\t",
>>  		ALTERNATIVE_TICKET_UNLOCK_HEAD
>>  		UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
>>  		"movl %[lock], %[token]\n\t"
> 
> If you're stretching (bloating) them anyway, perhaps we should be using
> "add" instructions instead, with their better EFLAGS behavior?

Hmm, yes, that possibility I didn't even consider. Would have
the potential to get away without that admittedly ugly "unary"
assembler macro altogether, though at the price of growing all
instructions rather than just those that have a non-symbolic
and small displacement. Since unlock generally gets inlined, I'm
not certain this additional growth in code size would be
acceptable...

Please let me know, though before submitting an eventual third
version I'd appreciate knowing especially the first two patches
need further changes in order to get accepted.

Thanks, Jan


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 3/4, v2] x86: enlightenment for ticket spin locks -  eliminate NOPs introduced by first patch
  2010-06-30  7:07   ` Jan Beulich
@ 2010-06-30 17:13     ` H. Peter Anvin
  0 siblings, 0 replies; 4+ messages in thread
From: H. Peter Anvin @ 2010-06-30 17:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: jeremy.fitzhardinge, mingo, tglx, Ky Srinivasan, linux-kernel

On 06/30/2010 12:07 AM, Jan Beulich wrote:
>>
>> If you're stretching (bloating) them anyway, perhaps we should be using
>> "add" instructions instead, with their better EFLAGS behavior?
> 
> Hmm, yes, that possibility I didn't even consider. Would have
> the potential to get away without that admittedly ugly "unary"
> assembler macro altogether, though at the price of growing all
> instructions rather than just those that have a non-symbolic
> and small displacement. Since unlock generally gets inlined, I'm
> not certain this additional growth in code size would be
> acceptable...
> 
> Please let me know, though before submitting an eventual third
> version I'd appreciate knowing especially the first two patches
> need further changes in order to get accepted.
> 

Will look at it today, hopefully.  The Syslinux 4.00 release has
unfortunately occupied me over the last week-plus.

As far as the "unary" macro is concerned... I have to admit I couldn't
even figure out what it was supposed to do.  It could definitely use a
better comment.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-06-30 17:13 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-29 14:33 [PATCH 3/4, v2] x86: enlightenment for ticket spin locks - eliminate NOPs introduced by first patch Jan Beulich
2010-06-30  1:13 ` H. Peter Anvin
2010-06-30  7:07   ` Jan Beulich
2010-06-30 17:13     ` H. Peter Anvin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.