All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Garnier <thgarnie@google.com>
To: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	"David S . Miller" <davem@davemloft.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, "H . Peter Anvin" <hpa@zytor.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Arnd Bergmann <arnd@arndb.de>, Kees Cook <keescook@chromium.org>,
	Andrey Ryabinin <aryabinin@virtuozzo.com>,
	Matthias Kaehlcke <mka@chromium.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Andy Lutomirski <luto@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Borislav Petkov <bp@suse.de>,
	"Rafael J . Wysocki" <rjw@rjwysocki.net>,
	Len Brown <len.brown@intel.com>, Pavel Machek <pavel@ucw.cz>,
	Juergen Gross <jgross@suse.com>,
	Chris Wright <chrisw@sous-sol.org>,
	Alok Kataria <akataria@vmware.com>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Tejun Heo <tj@kernel.or
Subject: Re: [PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
Date: Fri, 20 Oct 2017 07:48:57 -0700	[thread overview]
Message-ID: <CAJcbSZEhkdy9S3qtN4jFaj2doQsp1ORyHiNmzncDb2+bRatXmw@mail.gmail.com> (raw)
In-Reply-To: <CAKv+Gu9XMnNA0UoGfFMQmC9=Ryh6dcOduxH+tq49bcdvBwhyQw@mail.gmail.com>

On Fri, Oct 20, 2017 at 1:28 AM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 20 October 2017 at 09:24, Ingo Molnar <mingo@kernel.org> wrote:
>>
>> * Thomas Garnier <thgarnie@google.com> wrote:
>>
>>> Change the assembly code to use only relative references of symbols for the
>>> kernel to be PIE compatible.
>>>
>>> Position Independent Executable (PIE) support will allow to extended the
>>> KASLR randomization range below the -2G memory limit.
>>
>>> diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
>>> index 8739cf7795de..86fa068e5e81 100644
>>> --- a/arch/x86/crypto/aes-x86_64-asm_64.S
>>> +++ b/arch/x86/crypto/aes-x86_64-asm_64.S
>>> @@ -48,8 +48,12 @@
>>>  #define R10  %r10
>>>  #define R11  %r11
>>>
>>> +/* Hold global for PIE suport */
>>> +#define RBASE        %r12
>>> +
>>>  #define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
>>>       ENTRY(FUNC);                    \
>>> +     pushq   RBASE;                  \
>>>       movq    r1,r2;                  \
>>>       leaq    KEY+48(r8),r9;          \
>>>       movq    r10,r11;                \
>>> @@ -74,54 +78,63 @@
>>>       movl    r6 ## E,4(r9);          \
>>>       movl    r7 ## E,8(r9);          \
>>>       movl    r8 ## E,12(r9);         \
>>> +     popq    RBASE;                  \
>>>       ret;                            \
>>>       ENDPROC(FUNC);
>>>
>>> +#define round_mov(tab_off, reg_i, reg_o) \
>>> +     leaq    tab_off(%rip), RBASE; \
>>> +     movl    (RBASE,reg_i,4), reg_o;
>>> +
>>> +#define round_xor(tab_off, reg_i, reg_o) \
>>> +     leaq    tab_off(%rip), RBASE; \
>>> +     xorl    (RBASE,reg_i,4), reg_o;
>>> +
>>>  #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
>>>       movzbl  r2 ## H,r5 ## E;        \
>>>       movzbl  r2 ## L,r6 ## E;        \
>>> -     movl    TAB+1024(,r5,4),r5 ## E;\
>>> +     round_mov(TAB+1024, r5, r5 ## E)\
>>>       movw    r4 ## X,r2 ## X;        \
>>> -     movl    TAB(,r6,4),r6 ## E;     \
>>> +     round_mov(TAB, r6, r6 ## E)     \
>>>       roll    $16,r2 ## E;            \
>>>       shrl    $16,r4 ## E;            \
>>>       movzbl  r4 ## L,r7 ## E;        \
>>>       movzbl  r4 ## H,r4 ## E;        \
>>>       xorl    OFFSET(r8),ra ## E;     \
>>>       xorl    OFFSET+4(r8),rb ## E;   \
>>> -     xorl    TAB+3072(,r4,4),r5 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r6 ## E;\
>>> +     round_xor(TAB+3072, r4, r5 ## E)\
>>> +     round_xor(TAB+2048, r7, r6 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r4 ## E;        \
>>> -     movl    TAB+1024(,r4,4),r4 ## E;\
>>> +     round_mov(TAB+1024, r4, r4 ## E)\
>>>       movw    r3 ## X,r1 ## X;        \
>>>       roll    $16,r1 ## E;            \
>>>       shrl    $16,r3 ## E;            \
>>> -     xorl    TAB(,r7,4),r5 ## E;     \
>>> +     round_xor(TAB, r7, r5 ## E)     \
>>>       movzbl  r3 ## L,r7 ## E;        \
>>>       movzbl  r3 ## H,r3 ## E;        \
>>> -     xorl    TAB+3072(,r3,4),r4 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r5 ## E;\
>>> +     round_xor(TAB+3072, r3, r4 ## E)\
>>> +     round_xor(TAB+2048, r7, r5 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r3 ## E;        \
>>>       shrl    $16,r1 ## E;            \
>>> -     xorl    TAB+3072(,r3,4),r6 ## E;\
>>> -     movl    TAB+2048(,r7,4),r3 ## E;\
>>> +     round_xor(TAB+3072, r3, r6 ## E)\
>>> +     round_mov(TAB+2048, r7, r3 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r1 ## E;        \
>>> -     xorl    TAB+1024(,r1,4),r6 ## E;\
>>> -     xorl    TAB(,r7,4),r3 ## E;     \
>>> +     round_xor(TAB+1024, r1, r6 ## E)\
>>> +     round_xor(TAB, r7, r3 ## E)     \
>>>       movzbl  r2 ## H,r1 ## E;        \
>>>       movzbl  r2 ## L,r7 ## E;        \
>>>       shrl    $16,r2 ## E;            \
>>> -     xorl    TAB+3072(,r1,4),r3 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r4 ## E;\
>>> +     round_xor(TAB+3072, r1, r3 ## E)\
>>> +     round_xor(TAB+2048, r7, r4 ## E)\
>>>       movzbl  r2 ## H,r1 ## E;        \
>>>       movzbl  r2 ## L,r2 ## E;        \
>>>       xorl    OFFSET+8(r8),rc ## E;   \
>>>       xorl    OFFSET+12(r8),rd ## E;  \
>>> -     xorl    TAB+1024(,r1,4),r3 ## E;\
>>> -     xorl    TAB(,r2,4),r4 ## E;
>>> +     round_xor(TAB+1024, r1, r3 ## E)\
>>> +     round_xor(TAB, r2, r4 ## E)
>>
>> This appears to be adding unconditional overhead to a function that was moved to
>> assembly to improve its performance.
>>

It adds couple extra instructions, how much overhead it creates is
hard for me to tell. It would increase the code complexity if
everything is ifdef.

>
> I did some benchmarking on this code a while ago and, interestingly,
> it was slower than the generic C implementation (on a Pentium E2200),
> so we may want to consider whether we still need this driver in the
> first place.

Interesting.

-- 
Thomas

WARNING: multiple messages have this Message-ID (diff)
From: Thomas Garnier <thgarnie@google.com>
To: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	"David S . Miller" <davem@davemloft.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, "H . Peter Anvin" <hpa@zytor.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Arnd Bergmann <arnd@arndb.de>, Kees Cook <keescook@chromium.org>,
	Andrey Ryabinin <aryabinin@virtuozzo.com>,
	Matthias Kaehlcke <mka@chromium.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Andy Lutomirski <luto@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Borislav Petkov <bp@suse.de>,
	"Rafael J . Wysocki" <rjw@rjwysocki.net>,
	Len Brown <len.brown@intel.com>, Pavel Machek <pavel@ucw.cz>,
	Juergen Gross <jgross@suse.com>,
	Chris Wright <chrisw@sous-sol.org>,
	Alok Kataria <akataria@vmware.com>,
	Rusty Russell <rusty@rustcorp.com.au>, Tejun Heo <tj@kernel.or>
Subject: Re: [PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
Date: Fri, 20 Oct 2017 07:48:57 -0700	[thread overview]
Message-ID: <CAJcbSZEhkdy9S3qtN4jFaj2doQsp1ORyHiNmzncDb2+bRatXmw@mail.gmail.com> (raw)
In-Reply-To: <CAKv+Gu9XMnNA0UoGfFMQmC9=Ryh6dcOduxH+tq49bcdvBwhyQw@mail.gmail.com>

On Fri, Oct 20, 2017 at 1:28 AM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 20 October 2017 at 09:24, Ingo Molnar <mingo@kernel.org> wrote:
>>
>> * Thomas Garnier <thgarnie@google.com> wrote:
>>
>>> Change the assembly code to use only relative references of symbols for the
>>> kernel to be PIE compatible.
>>>
>>> Position Independent Executable (PIE) support will allow to extended the
>>> KASLR randomization range below the -2G memory limit.
>>
>>> diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
>>> index 8739cf7795de..86fa068e5e81 100644
>>> --- a/arch/x86/crypto/aes-x86_64-asm_64.S
>>> +++ b/arch/x86/crypto/aes-x86_64-asm_64.S
>>> @@ -48,8 +48,12 @@
>>>  #define R10  %r10
>>>  #define R11  %r11
>>>
>>> +/* Hold global for PIE suport */
>>> +#define RBASE        %r12
>>> +
>>>  #define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
>>>       ENTRY(FUNC);                    \
>>> +     pushq   RBASE;                  \
>>>       movq    r1,r2;                  \
>>>       leaq    KEY+48(r8),r9;          \
>>>       movq    r10,r11;                \
>>> @@ -74,54 +78,63 @@
>>>       movl    r6 ## E,4(r9);          \
>>>       movl    r7 ## E,8(r9);          \
>>>       movl    r8 ## E,12(r9);         \
>>> +     popq    RBASE;                  \
>>>       ret;                            \
>>>       ENDPROC(FUNC);
>>>
>>> +#define round_mov(tab_off, reg_i, reg_o) \
>>> +     leaq    tab_off(%rip), RBASE; \
>>> +     movl    (RBASE,reg_i,4), reg_o;
>>> +
>>> +#define round_xor(tab_off, reg_i, reg_o) \
>>> +     leaq    tab_off(%rip), RBASE; \
>>> +     xorl    (RBASE,reg_i,4), reg_o;
>>> +
>>>  #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
>>>       movzbl  r2 ## H,r5 ## E;        \
>>>       movzbl  r2 ## L,r6 ## E;        \
>>> -     movl    TAB+1024(,r5,4),r5 ## E;\
>>> +     round_mov(TAB+1024, r5, r5 ## E)\
>>>       movw    r4 ## X,r2 ## X;        \
>>> -     movl    TAB(,r6,4),r6 ## E;     \
>>> +     round_mov(TAB, r6, r6 ## E)     \
>>>       roll    $16,r2 ## E;            \
>>>       shrl    $16,r4 ## E;            \
>>>       movzbl  r4 ## L,r7 ## E;        \
>>>       movzbl  r4 ## H,r4 ## E;        \
>>>       xorl    OFFSET(r8),ra ## E;     \
>>>       xorl    OFFSET+4(r8),rb ## E;   \
>>> -     xorl    TAB+3072(,r4,4),r5 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r6 ## E;\
>>> +     round_xor(TAB+3072, r4, r5 ## E)\
>>> +     round_xor(TAB+2048, r7, r6 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r4 ## E;        \
>>> -     movl    TAB+1024(,r4,4),r4 ## E;\
>>> +     round_mov(TAB+1024, r4, r4 ## E)\
>>>       movw    r3 ## X,r1 ## X;        \
>>>       roll    $16,r1 ## E;            \
>>>       shrl    $16,r3 ## E;            \
>>> -     xorl    TAB(,r7,4),r5 ## E;     \
>>> +     round_xor(TAB, r7, r5 ## E)     \
>>>       movzbl  r3 ## L,r7 ## E;        \
>>>       movzbl  r3 ## H,r3 ## E;        \
>>> -     xorl    TAB+3072(,r3,4),r4 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r5 ## E;\
>>> +     round_xor(TAB+3072, r3, r4 ## E)\
>>> +     round_xor(TAB+2048, r7, r5 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r3 ## E;        \
>>>       shrl    $16,r1 ## E;            \
>>> -     xorl    TAB+3072(,r3,4),r6 ## E;\
>>> -     movl    TAB+2048(,r7,4),r3 ## E;\
>>> +     round_xor(TAB+3072, r3, r6 ## E)\
>>> +     round_mov(TAB+2048, r7, r3 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r1 ## E;        \
>>> -     xorl    TAB+1024(,r1,4),r6 ## E;\
>>> -     xorl    TAB(,r7,4),r3 ## E;     \
>>> +     round_xor(TAB+1024, r1, r6 ## E)\
>>> +     round_xor(TAB, r7, r3 ## E)     \
>>>       movzbl  r2 ## H,r1 ## E;        \
>>>       movzbl  r2 ## L,r7 ## E;        \
>>>       shrl    $16,r2 ## E;            \
>>> -     xorl    TAB+3072(,r1,4),r3 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r4 ## E;\
>>> +     round_xor(TAB+3072, r1, r3 ## E)\
>>> +     round_xor(TAB+2048, r7, r4 ## E)\
>>>       movzbl  r2 ## H,r1 ## E;        \
>>>       movzbl  r2 ## L,r2 ## E;        \
>>>       xorl    OFFSET+8(r8),rc ## E;   \
>>>       xorl    OFFSET+12(r8),rd ## E;  \
>>> -     xorl    TAB+1024(,r1,4),r3 ## E;\
>>> -     xorl    TAB(,r2,4),r4 ## E;
>>> +     round_xor(TAB+1024, r1, r3 ## E)\
>>> +     round_xor(TAB, r2, r4 ## E)
>>
>> This appears to be adding unconditional overhead to a function that was moved to
>> assembly to improve its performance.
>>

It adds couple extra instructions, how much overhead it creates is
hard for me to tell. It would increase the code complexity if
everything is ifdef.

>
> I did some benchmarking on this code a while ago and, interestingly,
> it was slower than the generic C implementation (on a Pentium E2200),
> so we may want to consider whether we still need this driver in the
> first place.

Interesting.

-- 
Thomas

WARNING: multiple messages have this Message-ID (diff)
From: Thomas Garnier <thgarnie@google.com>
To: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: "Ingo Molnar" <mingo@kernel.org>,
	"Herbert Xu" <herbert@gondor.apana.org.au>,
	"David S . Miller" <davem@davemloft.net>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Ingo Molnar" <mingo@redhat.com>,
	"H . Peter Anvin" <hpa@zytor.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Josh Poimboeuf" <jpoimboe@redhat.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Kees Cook" <keescook@chromium.org>,
	"Andrey Ryabinin" <aryabinin@virtuozzo.com>,
	"Matthias Kaehlcke" <mka@chromium.org>,
	"Tom Lendacky" <thomas.lendacky@amd.com>,
	"Andy Lutomirski" <luto@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	"Borislav Petkov" <bp@suse.de>,
	"Rafael J . Wysocki" <rjw@rjwysocki.net>,
	"Len Brown" <len.brown@intel.com>, "Pavel Machek" <pavel@ucw.cz>,
	"Juergen Gross" <jgross@suse.com>,
	"Chris Wright" <chrisw@sous-sol.org>,
	"Alok Kataria" <akataria@vmware.com>,
	"Rusty Russell" <rusty@rustcorp.com.au>,
	"Tejun Heo" <tj@kernel.org>, "Christoph Lameter" <cl@linux.com>,
	"Boris Ostrovsky" <boris.ostrovsky@oracle.com>,
	"Paul Gortmaker" <paul.gortmaker@windriver.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Alexey Dobriyan" <adobriyan@gmail.com>,
	"Paul E . McKenney" <paulmck@linux.vnet.ibm.com>,
	"Nicolas Pitre" <nicolas.pitre@linaro.org>,
	"Borislav Petkov" <bp@alien8.de>,
	"Luis R . Rodriguez" <mcgrof@kernel.org>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	"Christopher Li" <sparse@chrisli.org>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Jason Baron" <jbaron@akamai.com>,
	"Mika Westerberg" <mika.westerberg@linux.intel.com>,
	"Dou Liyang" <douly.fnst@cn.fujitsu.com>,
	"Rafael J . Wysocki" <rafael.j.wysocki@intel.com>,
	"Lukas Wunner" <lukas@wunner.de>,
	"Masahiro Yamada" <yamada.masahiro@socionext.com>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Markus Trippelsdorf" <markus@trippelsdorf.de>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Joerg Roedel" <joro@8bytes.org>,
	"Rik van Riel" <riel@redhat.com>,
	"David Howells" <dhowells@redhat.com>,
	"Waiman Long" <longman@redhat.com>, "Kyle Huey" <me@kylehuey.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Michal Hocko" <mhocko@suse.com>,
	"Peter Foley" <pefoley2@pefoley.com>,
	"Paul Bolle" <pebolle@tiscali.nl>,
	"Jiri Kosina" <jkosina@suse.cz>,
	"H . J . Lu" <hjl.tools@gmail.com>,
	"Rob Landley" <rob@landley.net>, "Baoquan He" <bhe@redhat.com>,
	"Jan H . Schönherr" <jschoenh@amazon.de>,
	"Daniel Micay" <danielmicay@gmail.com>,
	"x86@kernel.org" <x86@kernel.org>,
	"linux-crypto@vger.kernel.org" <linux-crypto@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	linux-pm <linux-pm@vger.kernel.org>,
	virtualization@lists.linux-foundation.org,
	xen-devel <xen-devel@lists.xenproject.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
	Linux-Sparse <linux-sparse@vger.kernel.org>,
	"KVM devel mailing list" <kvm@vger.kernel.org>,
	"Linux Doc Mailing List" <linux-doc@vger.kernel.org>,
	"Kernel Hardening" <kernel-hardening@lists.openwall.com>
Subject: [kernel-hardening] Re: [PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
Date: Fri, 20 Oct 2017 07:48:57 -0700	[thread overview]
Message-ID: <CAJcbSZEhkdy9S3qtN4jFaj2doQsp1ORyHiNmzncDb2+bRatXmw@mail.gmail.com> (raw)
In-Reply-To: <CAKv+Gu9XMnNA0UoGfFMQmC9=Ryh6dcOduxH+tq49bcdvBwhyQw@mail.gmail.com>

On Fri, Oct 20, 2017 at 1:28 AM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 20 October 2017 at 09:24, Ingo Molnar <mingo@kernel.org> wrote:
>>
>> * Thomas Garnier <thgarnie@google.com> wrote:
>>
>>> Change the assembly code to use only relative references of symbols for the
>>> kernel to be PIE compatible.
>>>
>>> Position Independent Executable (PIE) support will allow to extended the
>>> KASLR randomization range below the -2G memory limit.
>>
>>> diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
>>> index 8739cf7795de..86fa068e5e81 100644
>>> --- a/arch/x86/crypto/aes-x86_64-asm_64.S
>>> +++ b/arch/x86/crypto/aes-x86_64-asm_64.S
>>> @@ -48,8 +48,12 @@
>>>  #define R10  %r10
>>>  #define R11  %r11
>>>
>>> +/* Hold global for PIE suport */
>>> +#define RBASE        %r12
>>> +
>>>  #define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
>>>       ENTRY(FUNC);                    \
>>> +     pushq   RBASE;                  \
>>>       movq    r1,r2;                  \
>>>       leaq    KEY+48(r8),r9;          \
>>>       movq    r10,r11;                \
>>> @@ -74,54 +78,63 @@
>>>       movl    r6 ## E,4(r9);          \
>>>       movl    r7 ## E,8(r9);          \
>>>       movl    r8 ## E,12(r9);         \
>>> +     popq    RBASE;                  \
>>>       ret;                            \
>>>       ENDPROC(FUNC);
>>>
>>> +#define round_mov(tab_off, reg_i, reg_o) \
>>> +     leaq    tab_off(%rip), RBASE; \
>>> +     movl    (RBASE,reg_i,4), reg_o;
>>> +
>>> +#define round_xor(tab_off, reg_i, reg_o) \
>>> +     leaq    tab_off(%rip), RBASE; \
>>> +     xorl    (RBASE,reg_i,4), reg_o;
>>> +
>>>  #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
>>>       movzbl  r2 ## H,r5 ## E;        \
>>>       movzbl  r2 ## L,r6 ## E;        \
>>> -     movl    TAB+1024(,r5,4),r5 ## E;\
>>> +     round_mov(TAB+1024, r5, r5 ## E)\
>>>       movw    r4 ## X,r2 ## X;        \
>>> -     movl    TAB(,r6,4),r6 ## E;     \
>>> +     round_mov(TAB, r6, r6 ## E)     \
>>>       roll    $16,r2 ## E;            \
>>>       shrl    $16,r4 ## E;            \
>>>       movzbl  r4 ## L,r7 ## E;        \
>>>       movzbl  r4 ## H,r4 ## E;        \
>>>       xorl    OFFSET(r8),ra ## E;     \
>>>       xorl    OFFSET+4(r8),rb ## E;   \
>>> -     xorl    TAB+3072(,r4,4),r5 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r6 ## E;\
>>> +     round_xor(TAB+3072, r4, r5 ## E)\
>>> +     round_xor(TAB+2048, r7, r6 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r4 ## E;        \
>>> -     movl    TAB+1024(,r4,4),r4 ## E;\
>>> +     round_mov(TAB+1024, r4, r4 ## E)\
>>>       movw    r3 ## X,r1 ## X;        \
>>>       roll    $16,r1 ## E;            \
>>>       shrl    $16,r3 ## E;            \
>>> -     xorl    TAB(,r7,4),r5 ## E;     \
>>> +     round_xor(TAB, r7, r5 ## E)     \
>>>       movzbl  r3 ## L,r7 ## E;        \
>>>       movzbl  r3 ## H,r3 ## E;        \
>>> -     xorl    TAB+3072(,r3,4),r4 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r5 ## E;\
>>> +     round_xor(TAB+3072, r3, r4 ## E)\
>>> +     round_xor(TAB+2048, r7, r5 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r3 ## E;        \
>>>       shrl    $16,r1 ## E;            \
>>> -     xorl    TAB+3072(,r3,4),r6 ## E;\
>>> -     movl    TAB+2048(,r7,4),r3 ## E;\
>>> +     round_xor(TAB+3072, r3, r6 ## E)\
>>> +     round_mov(TAB+2048, r7, r3 ## E)\
>>>       movzbl  r1 ## L,r7 ## E;        \
>>>       movzbl  r1 ## H,r1 ## E;        \
>>> -     xorl    TAB+1024(,r1,4),r6 ## E;\
>>> -     xorl    TAB(,r7,4),r3 ## E;     \
>>> +     round_xor(TAB+1024, r1, r6 ## E)\
>>> +     round_xor(TAB, r7, r3 ## E)     \
>>>       movzbl  r2 ## H,r1 ## E;        \
>>>       movzbl  r2 ## L,r7 ## E;        \
>>>       shrl    $16,r2 ## E;            \
>>> -     xorl    TAB+3072(,r1,4),r3 ## E;\
>>> -     xorl    TAB+2048(,r7,4),r4 ## E;\
>>> +     round_xor(TAB+3072, r1, r3 ## E)\
>>> +     round_xor(TAB+2048, r7, r4 ## E)\
>>>       movzbl  r2 ## H,r1 ## E;        \
>>>       movzbl  r2 ## L,r2 ## E;        \
>>>       xorl    OFFSET+8(r8),rc ## E;   \
>>>       xorl    OFFSET+12(r8),rd ## E;  \
>>> -     xorl    TAB+1024(,r1,4),r3 ## E;\
>>> -     xorl    TAB(,r2,4),r4 ## E;
>>> +     round_xor(TAB+1024, r1, r3 ## E)\
>>> +     round_xor(TAB, r2, r4 ## E)
>>
>> This appears to be adding unconditional overhead to a function that was moved to
>> assembly to improve its performance.
>>

It adds couple extra instructions, how much overhead it creates is
hard for me to tell. It would increase the code complexity if
everything is ifdef.

>
> I did some benchmarking on this code a while ago and, interestingly,
> it was slower than the generic C implementation (on a Pentium E2200),
> so we may want to consider whether we still need this driver in the
> first place.

Interesting.

-- 
Thomas

  parent reply	other threads:[~2017-10-20 14:48 UTC|newest]

Thread overview: 176+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-11 20:30 [PATCH v1 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
2017-10-11 20:30 ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-20  8:24   ` Ingo Molnar
2017-10-20  8:24     ` [kernel-hardening] " Ingo Molnar
2017-10-20  8:24     ` Ingo Molnar
2017-10-20  8:28     ` Ard Biesheuvel
2017-10-20  8:28       ` [kernel-hardening] " Ard Biesheuvel
2017-10-20 14:48       ` Thomas Garnier via Virtualization
2017-10-20 14:48       ` Thomas Garnier [this message]
2017-10-20 14:48         ` [kernel-hardening] " Thomas Garnier
2017-10-20 14:48         ` Thomas Garnier
2017-10-20 14:48       ` Thomas Garnier
2017-10-20  8:28     ` Ard Biesheuvel
2017-10-20  8:24   ` Ingo Molnar
2017-10-20  8:24   ` Ingo Molnar
2017-10-11 20:30 ` [PATCH v1 02/27] x86: Use symbol name on bug table " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 03/27] x86: Use symbol name in jump " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 04/27] x86: Add macro to get symbol address " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 05/27] x86: relocate_kernel - Adapt assembly " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 06/27] x86/entry/64: " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-20  8:26   ` Ingo Molnar
2017-10-20  8:26     ` [kernel-hardening] " Ingo Molnar
2017-10-20 14:47     ` Thomas Garnier via Virtualization
2017-10-20 14:47     ` Thomas Garnier
2017-10-20 14:47       ` [kernel-hardening] " Thomas Garnier
2017-10-20 14:47       ` Thomas Garnier
2017-10-20 15:20       ` Ingo Molnar
2017-10-20 15:20       ` Ingo Molnar
2017-10-20 15:20         ` [kernel-hardening] " Ingo Molnar
2017-10-20 16:27         ` Andy Lutomirski
2017-10-20 16:27         ` Andy Lutomirski
2017-10-20 16:27         ` Andy Lutomirski
2017-10-20 16:27           ` [kernel-hardening] " Andy Lutomirski
2017-10-20 16:27           ` Andy Lutomirski
2017-10-20 17:52         ` Andy Lutomirski
2017-10-20 17:52         ` Andy Lutomirski
2017-10-20 17:52           ` [kernel-hardening] " Andy Lutomirski
2017-10-20 17:52           ` Andy Lutomirski
2017-10-20 17:52         ` Andy Lutomirski
2017-10-20 14:47     ` Thomas Garnier
2017-10-20  8:26   ` Ingo Molnar
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 07/27] x86: pm-trace - " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 08/27] x86/CPU: " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 09/27] x86/acpi: " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 10/27] x86/boot/64: " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 11/27] x86/power/64: " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 12/27] x86/paravirt: " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 13/27] x86/boot/64: Use _text in a global " Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 14/27] x86/percpu: Adapt percpu " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 15/27] compiler: Option to default to hidden symbols Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-12 20:02   ` Luis R. Rodriguez
2017-10-12 20:02     ` [kernel-hardening] " Luis R. Rodriguez
2017-10-18 23:15     ` Thomas Garnier via Virtualization
2017-10-18 23:15     ` Thomas Garnier
2017-10-18 23:15     ` Thomas Garnier
2017-10-18 23:15       ` [kernel-hardening] " Thomas Garnier
2017-10-18 23:15       ` Thomas Garnier
2017-10-19 19:38       ` Luis R. Rodriguez
2017-10-19 19:38         ` [kernel-hardening] " Luis R. Rodriguez
2017-10-19 19:38       ` Luis R. Rodriguez
2017-10-12 20:02   ` Luis R. Rodriguez
2017-10-11 20:30 ` [PATCH v1 16/27] x86/relocs: Handle PIE relocations Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 17/27] xen: Adapt assembly for PIE support Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 18/27] kvm: " Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 19/27] x86: Support global stack cookie Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 20/27] x86/ftrace: Adapt function tracing for PIE support Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 21/27] x86/mm/dump_pagetables: Fix address markers index on x86_64 Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 22/27] x86/modules: Add option to start module section after kernel Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 23/27] x86/modules: Adapt module loading for PIE support Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 24/27] x86/mm: Make the x86 GOT read-only Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 25/27] x86/pie: Add option to build the kernel as PIE Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` [PATCH v1 26/27] x86/relocs: Add option to generate 64-bit relocations Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 20:30 ` Thomas Garnier via Virtualization
2017-10-11 20:30 ` [PATCH v1 27/27] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB Thomas Garnier via Virtualization
2017-10-11 20:30 ` Thomas Garnier
2017-10-11 20:30   ` [kernel-hardening] " Thomas Garnier
2017-10-11 20:30   ` Thomas Garnier
2017-10-11 21:34 ` [PATCH v1 00/27] x86: PIE support and option to extend KASLR randomization Tom Lendacky
2017-10-11 21:34 ` Tom Lendacky
2017-10-11 21:34   ` [kernel-hardening] " Tom Lendacky
2017-10-11 21:34   ` Tom Lendacky
2017-10-12 15:34   ` Thomas Garnier via Virtualization
2017-10-12 15:34   ` Thomas Garnier
2017-10-12 15:34   ` Thomas Garnier
2017-10-12 15:34     ` [kernel-hardening] " Thomas Garnier
2017-10-12 15:34     ` Thomas Garnier
2017-10-12 15:51     ` Markus Trippelsdorf
2017-10-12 15:51     ` Markus Trippelsdorf
2017-10-12 15:51       ` [kernel-hardening] " Markus Trippelsdorf
2017-10-12 16:28     ` Tom Lendacky
2017-10-12 16:28     ` Tom Lendacky
2017-10-12 16:28       ` [kernel-hardening] " Tom Lendacky
2017-10-18 23:17       ` Thomas Garnier via Virtualization
2017-10-18 23:17       ` Thomas Garnier
2017-10-18 23:17       ` Thomas Garnier
2017-10-18 23:17         ` [kernel-hardening] " Thomas Garnier
2017-10-18 23:17         ` Thomas Garnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAJcbSZEhkdy9S3qtN4jFaj2doQsp1ORyHiNmzncDb2+bRatXmw@mail.gmail.com \
    --to=thgarnie@google.com \
    --cc=akataria@vmware.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=arnd@arndb.de \
    --cc=aryabinin@virtuozzo.com \
    --cc=bp@suse.de \
    --cc=chrisw@sous-sol.org \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jpoimboe@redhat.com \
    --cc=keescook@chromium.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=len.brown@intel.com \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mka@chromium.org \
    --cc=pavel@ucw.cz \
    --cc=peterz@infradead.org \
    --cc=rjw@rjwysocki.net \
    --cc=rusty@rustcorp.com.au \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=tj@kernel.or \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.