From: Josh Poimboeuf <jpoimboe@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
x86@kernel.org
Cc: linux-kernel@vger.kernel.org, live-patching@vger.kernel.org,
Michal Marek <mmarek@suse.cz>,
Peter Zijlstra <peterz@infradead.org>,
Andy Lutomirski <luto@kernel.org>, Borislav Petkov <bp@alien8.de>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andi Kleen <andi@firstfloor.org>, Pedro Alves <palves@redhat.com>,
Namhyung Kim <namhyung@gmail.com>,
Bernd Petrovitsch <bernd@petrovitsch.priv.at>,
Chris J Arges <chris.j.arges@canonical.com>,
Andrew Morton <akpm@linux-foundation.org>,
Jiri Slaby <jslaby@suse.cz>,
Arnaldo Carvalho de Melo <acme@kernel.org>
Subject: [PATCH 13/33] x86/asm/crypto: Simplify stack usage in sha-mb functions
Date: Thu, 21 Jan 2016 16:49:17 -0600 [thread overview]
Message-ID: <9402e4d87580d6b2376ed95f67b84bdcce3c830e.1453405861.git.jpoimboe@redhat.com> (raw)
In-Reply-To: <cover.1453405861.git.jpoimboe@redhat.com>
sha1_mb_mgr_flush_avx2() and sha1_mb_mgr_submit_avx2() both allocate a
lot of stack space which is never used. Also, many of the registers
being saved aren't being clobbered so there's no need to save them.
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S | 32 ++----------------------
arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S | 29 +++------------------
2 files changed, 6 insertions(+), 55 deletions(-)
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
index 85c4e1c..672eaeb 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
@@ -86,16 +86,6 @@
#define extra_blocks %arg2
#define p %arg2
-
-# STACK_SPACE needs to be an odd multiple of 8
-_XMM_SAVE_SIZE = 10*16
-_GPR_SAVE_SIZE = 8*8
-_ALIGN_SIZE = 8
-
-_XMM_SAVE = 0
-_GPR_SAVE = _XMM_SAVE + _XMM_SAVE_SIZE
-STACK_SPACE = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE
-
.macro LABEL prefix n
\prefix\n\():
.endm
@@ -113,16 +103,7 @@ offset = \_offset
# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
# arg 1 : rcx : state
ENTRY(sha1_mb_mgr_flush_avx2)
- mov %rsp, %r10
- sub $STACK_SPACE, %rsp
- and $~31, %rsp
- mov %rbx, _GPR_SAVE(%rsp)
- mov %r10, _GPR_SAVE+8*1(%rsp) #save rsp
- mov %rbp, _GPR_SAVE+8*3(%rsp)
- mov %r12, _GPR_SAVE+8*4(%rsp)
- mov %r13, _GPR_SAVE+8*5(%rsp)
- mov %r14, _GPR_SAVE+8*6(%rsp)
- mov %r15, _GPR_SAVE+8*7(%rsp)
+ push %rbx
# If bit (32+3) is set, then all lanes are empty
mov _unused_lanes(state), unused_lanes
@@ -230,16 +211,7 @@ len_is_0:
mov tmp2_w, offset(job_rax)
return:
-
- mov _GPR_SAVE(%rsp), %rbx
- mov _GPR_SAVE+8*1(%rsp), %r10 #saved rsp
- mov _GPR_SAVE+8*3(%rsp), %rbp
- mov _GPR_SAVE+8*4(%rsp), %r12
- mov _GPR_SAVE+8*5(%rsp), %r13
- mov _GPR_SAVE+8*6(%rsp), %r14
- mov _GPR_SAVE+8*7(%rsp), %r15
- mov %r10, %rsp
-
+ pop %rbx
ret
return_null:
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
index 2ab9560..a5a14c62 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
@@ -94,25 +94,12 @@ DWORD_tmp = %r9d
lane_data = %r10
-# STACK_SPACE needs to be an odd multiple of 8
-STACK_SPACE = 8*8 + 16*10 + 8
-
# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
# arg 1 : rcx : state
# arg 2 : rdx : job
ENTRY(sha1_mb_mgr_submit_avx2)
-
- mov %rsp, %r10
- sub $STACK_SPACE, %rsp
- and $~31, %rsp
-
- mov %rbx, (%rsp)
- mov %r10, 8*2(%rsp) #save old rsp
- mov %rbp, 8*3(%rsp)
- mov %r12, 8*4(%rsp)
- mov %r13, 8*5(%rsp)
- mov %r14, 8*6(%rsp)
- mov %r15, 8*7(%rsp)
+ push %rbx
+ push %rbp
mov _unused_lanes(state), unused_lanes
mov unused_lanes, lane
@@ -203,16 +190,8 @@ len_is_0:
movl DWORD_tmp, _result_digest+1*16(job_rax)
return:
-
- mov (%rsp), %rbx
- mov 8*2(%rsp), %r10 #save old rsp
- mov 8*3(%rsp), %rbp
- mov 8*4(%rsp), %r12
- mov 8*5(%rsp), %r13
- mov 8*6(%rsp), %r14
- mov 8*7(%rsp), %r15
- mov %r10, %rsp
-
+ pop %rbp
+ pop %rbx
ret
return_null:
--
2.4.3
next prev parent reply other threads:[~2016-01-21 23:01 UTC|newest]
Thread overview: 133+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-21 22:49 [PATCH 00/33] Compile-time stack metadata validation Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 01/33] x86/stacktool: " Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 02/33] kbuild/stacktool: Add CONFIG_STACK_VALIDATION option Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 03/33] x86/stacktool: Enable stacktool on x86_64 Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 04/33] x86/stacktool: Add STACKTOOL_IGNORE_FUNC macro Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 05/33] x86/xen: Add stack frame dependency to hypercall inline asm calls Josh Poimboeuf
2016-02-23 8:55 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:45 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 06/33] x86/asm/xen: Set ELF function type for xen_adjust_exception_frame() Josh Poimboeuf
2016-02-23 8:56 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:45 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 07/33] x86/asm/xen: Create stack frames in xen-asm.S Josh Poimboeuf
2016-02-23 8:56 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:45 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 08/33] x86/paravirt: Add stack frame dependency to PVOP inline asm calls Josh Poimboeuf
2016-02-23 8:56 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:46 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 09/33] x86/paravirt: Create a stack frame in PV_CALLEE_SAVE_REGS_THUNK Josh Poimboeuf
2016-02-23 8:57 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:46 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 10/33] x86/amd: Set ELF function type for vide() Josh Poimboeuf
2016-02-23 8:57 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:46 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 11/33] x86/asm/crypto: Move .Lbswap_mask data to .rodata section Josh Poimboeuf
2016-02-23 8:58 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:47 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 12/33] x86/asm/crypto: Move jump_table " Josh Poimboeuf
2016-02-23 8:58 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:47 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` Josh Poimboeuf [this message]
2016-02-23 8:59 ` [tip:x86/debug] x86/asm/crypto: Simplify stack usage in sha-mb functions =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:47 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 14/33] x86/asm/crypto: Don't use rbp as a scratch register Josh Poimboeuf
2016-02-23 8:59 ` [tip:x86/debug] x86/asm/crypto: Don't use RBP " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:48 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 15/33] x86/asm/crypto: Create stack frames in crypto functions Josh Poimboeuf
2016-02-23 8:59 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:48 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 16/33] x86/asm/entry: Create stack frames in thunk functions Josh Poimboeuf
2016-02-23 9:00 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:48 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 17/33] x86/asm/acpi: Create a stack frame in do_suspend_lowlevel() Josh Poimboeuf
2016-02-23 9:00 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-23 11:39 ` Pavel Machek
2016-02-25 5:49 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 18/33] x86/asm: Create stack frames in rwsem functions Josh Poimboeuf
2016-02-23 9:01 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:49 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 19/33] x86/asm/efi: Create a stack frame in efi_call() Josh Poimboeuf
2016-02-23 9:01 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:49 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 20/33] x86/asm/power: Create stack frames in hibernate_asm_64.S Josh Poimboeuf
2016-02-23 9:01 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:50 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 21/33] x86/uaccess: Add stack frame output operand in get_user inline asm Josh Poimboeuf
2016-02-23 9:02 ` [tip:x86/debug] x86/uaccess: Add stack frame output operand in get_user() " =?UTF-8?B?dGlwLWJvdCBmb3IgQ2hyaXMgSiBBcmdlcyA8dGlwYm90QHp5dG9yLmNvbT4=?=
2016-02-25 5:50 ` tip-bot for Chris J Arges
2016-01-21 22:49 ` [PATCH 22/33] x86/asm/bpf: Annotate callable functions Josh Poimboeuf
2016-02-23 9:02 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:50 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 23/33] x86/asm/bpf: Create stack frames in bpf_jit.S Josh Poimboeuf
2016-01-22 2:44 ` Alexei Starovoitov
2016-01-22 3:55 ` Josh Poimboeuf
2016-01-22 4:18 ` Alexei Starovoitov
2016-01-22 7:36 ` Ingo Molnar
2016-01-22 15:58 ` Josh Poimboeuf
2016-01-22 17:18 ` Alexei Starovoitov
2016-01-22 17:36 ` Josh Poimboeuf
2016-01-22 17:40 ` Alexei Starovoitov
2016-02-23 9:03 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:51 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 24/33] x86/kprobes: Get rid of kretprobe_trampoline_holder() Josh Poimboeuf
2016-01-21 23:42 ` 平松雅巳 / HIRAMATU,MASAMI
2016-02-23 9:03 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:51 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 25/33] x86/kvm: Set ELF function type for fastop functions Josh Poimboeuf
2016-01-22 10:05 ` Paolo Bonzini
2016-02-23 9:03 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:51 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 26/33] x86/kvm: Add stack frame dependency to test_cc() inline asm Josh Poimboeuf
2016-01-22 10:05 ` Paolo Bonzini
2016-01-22 16:02 ` Josh Poimboeuf
2016-01-22 16:16 ` [PATCH v16.1 26/33] x86/kvm: Make test_cc() always inline Josh Poimboeuf
2016-02-23 9:04 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:52 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 27/33] watchdog/hpwdt: Create stack frame in asminline_call() Josh Poimboeuf
2016-02-23 9:04 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:52 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 28/33] x86/locking: Create stack frame in PV unlock Josh Poimboeuf
2016-02-23 9:05 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:52 ` tip-bot for Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 29/33] x86/stacktool: Add directory and file whitelists Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 30/33] x86/xen: Add xen_cpuid() to stacktool whitelist Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 31/33] bpf: Add __bpf_prog_run() " Josh Poimboeuf
2016-01-21 22:57 ` Daniel Borkmann
2016-01-22 2:55 ` Alexei Starovoitov
2016-01-22 4:13 ` Josh Poimboeuf
2016-01-22 17:19 ` Alexei Starovoitov
2016-01-21 22:49 ` [PATCH 32/33] sched: Add __schedule() " Josh Poimboeuf
2016-01-21 22:49 ` [PATCH 33/33] x86/kprobes: Add kretprobe_trampoline() " Josh Poimboeuf
2016-01-22 17:43 ` [PATCH 00/33] Compile-time stack metadata validation Chris J Arges
2016-01-22 19:14 ` Josh Poimboeuf
2016-01-22 20:40 ` Chris J Arges
2016-01-22 20:47 ` Josh Poimboeuf
2016-01-22 21:44 ` [PATCH 0/2] A few stacktool warning fixes Chris J Arges
2016-01-22 21:44 ` [PATCH 1/2] tools/stacktool: Add __reiserfs_panic to global_noreturns list Chris J Arges
2016-01-25 15:04 ` Josh Poimboeuf
2016-01-22 21:44 ` [PATCH 2/2] x86/kvm: Add output operand in vmx_handle_external_intr inline asm Chris J Arges
2016-01-25 15:05 ` Josh Poimboeuf
2016-02-23 9:05 ` [tip:x86/debug] " =?UTF-8?B?dGlwLWJvdCBmb3IgQ2hyaXMgSiBBcmdlcyA8dGlwYm90QHp5dG9yLmNvbT4=?=
2016-02-25 5:53 ` tip-bot for Chris J Arges
2016-02-12 10:36 ` [PATCH 00/33] Compile-time stack metadata validation Jiri Slaby
2016-02-12 10:41 ` Jiri Slaby
2016-02-12 14:45 ` Josh Poimboeuf
2016-02-12 17:10 ` Peter Zijlstra
2016-02-12 18:32 ` Josh Poimboeuf
2016-02-12 18:34 ` Josh Poimboeuf
2016-02-12 20:10 ` Peter Zijlstra
2016-02-15 16:31 ` Josh Poimboeuf
2016-02-15 16:49 ` Peter Zijlstra
[not found] ` <CA+55aFzoPCd_LcSx1FUuEhSBYk2KrfzXGj-Vcn39W5bz=KuZhA@mail.gmail.com>
2016-02-15 20:01 ` Josh Poimboeuf
2016-02-18 17:41 ` [PATCH] sched/x86: Add stack frame dependency to __preempt_schedule[_notrace] Josh Poimboeuf
2016-02-19 12:05 ` Jiri Slaby
2016-02-23 9:05 ` [tip:x86/debug] sched/x86: Add stack frame dependency to __preempt_schedule[_notrace]() =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=
2016-02-25 5:53 ` tip-bot for Josh Poimboeuf
2016-02-15 20:02 ` [PATCH 00/33] Compile-time stack metadata validation Andi Kleen
2016-02-23 8:14 ` Ingo Molnar
2016-02-23 14:27 ` Arnaldo Carvalho de Melo
2016-02-23 15:07 ` Josh Poimboeuf
2016-02-23 15:28 ` Arnaldo Carvalho de Melo
2016-02-23 15:01 ` Josh Poimboeuf
2016-02-24 7:40 ` Ingo Molnar
2016-02-24 16:32 ` Josh Poimboeuf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9402e4d87580d6b2376ed95f67b84bdcce3c830e.1453405861.git.jpoimboe@redhat.com \
--to=jpoimboe@redhat.com \
--cc=acme@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=bernd@petrovitsch.priv.at \
--cc=bp@alien8.de \
--cc=chris.j.arges@canonical.com \
--cc=hpa@zytor.com \
--cc=jslaby@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=live-patching@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=mmarek@suse.cz \
--cc=namhyung@gmail.com \
--cc=palves@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).