From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752191AbcAUXBA (ORCPT ); Thu, 21 Jan 2016 18:01:00 -0500 Received: from mx1.redhat.com ([209.132.183.28]:35443 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751009AbcAUWvF (ORCPT ); Thu, 21 Jan 2016 17:51:05 -0500 From: Josh Poimboeuf To: Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , x86@kernel.org Cc: linux-kernel@vger.kernel.org, live-patching@vger.kernel.org, Michal Marek , Peter Zijlstra , Andy Lutomirski , Borislav Petkov , Linus Torvalds , Andi Kleen , Pedro Alves , Namhyung Kim , Bernd Petrovitsch , Chris J Arges , Andrew Morton , Jiri Slaby , Arnaldo Carvalho de Melo Subject: [PATCH 13/33] x86/asm/crypto: Simplify stack usage in sha-mb functions Date: Thu, 21 Jan 2016 16:49:17 -0600 Message-Id: <9402e4d87580d6b2376ed95f67b84bdcce3c830e.1453405861.git.jpoimboe@redhat.com> In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org sha1_mb_mgr_flush_avx2() and sha1_mb_mgr_submit_avx2() both allocate a lot of stack space which is never used. Also, many of the registers being saved aren't being clobbered so there's no need to save them. Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S | 32 ++---------------------- arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S | 29 +++------------------ 2 files changed, 6 insertions(+), 55 deletions(-) diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S index 85c4e1c..672eaeb 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S @@ -86,16 +86,6 @@ #define extra_blocks %arg2 #define p %arg2 - -# STACK_SPACE needs to be an odd multiple of 8 -_XMM_SAVE_SIZE = 10*16 -_GPR_SAVE_SIZE = 8*8 -_ALIGN_SIZE = 8 - -_XMM_SAVE = 0 -_GPR_SAVE = _XMM_SAVE + _XMM_SAVE_SIZE -STACK_SPACE = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE - .macro LABEL prefix n \prefix\n\(): .endm @@ -113,16 +103,7 @@ offset = \_offset # JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) # arg 1 : rcx : state ENTRY(sha1_mb_mgr_flush_avx2) - mov %rsp, %r10 - sub $STACK_SPACE, %rsp - and $~31, %rsp - mov %rbx, _GPR_SAVE(%rsp) - mov %r10, _GPR_SAVE+8*1(%rsp) #save rsp - mov %rbp, _GPR_SAVE+8*3(%rsp) - mov %r12, _GPR_SAVE+8*4(%rsp) - mov %r13, _GPR_SAVE+8*5(%rsp) - mov %r14, _GPR_SAVE+8*6(%rsp) - mov %r15, _GPR_SAVE+8*7(%rsp) + push %rbx # If bit (32+3) is set, then all lanes are empty mov _unused_lanes(state), unused_lanes @@ -230,16 +211,7 @@ len_is_0: mov tmp2_w, offset(job_rax) return: - - mov _GPR_SAVE(%rsp), %rbx - mov _GPR_SAVE+8*1(%rsp), %r10 #saved rsp - mov _GPR_SAVE+8*3(%rsp), %rbp - mov _GPR_SAVE+8*4(%rsp), %r12 - mov _GPR_SAVE+8*5(%rsp), %r13 - mov _GPR_SAVE+8*6(%rsp), %r14 - mov _GPR_SAVE+8*7(%rsp), %r15 - mov %r10, %rsp - + pop %rbx ret return_null: diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S index 2ab9560..a5a14c62 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S @@ -94,25 +94,12 @@ DWORD_tmp = %r9d lane_data = %r10 -# STACK_SPACE needs to be an odd multiple of 8 -STACK_SPACE = 8*8 + 16*10 + 8 - # JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) # arg 1 : rcx : state # arg 2 : rdx : job ENTRY(sha1_mb_mgr_submit_avx2) - - mov %rsp, %r10 - sub $STACK_SPACE, %rsp - and $~31, %rsp - - mov %rbx, (%rsp) - mov %r10, 8*2(%rsp) #save old rsp - mov %rbp, 8*3(%rsp) - mov %r12, 8*4(%rsp) - mov %r13, 8*5(%rsp) - mov %r14, 8*6(%rsp) - mov %r15, 8*7(%rsp) + push %rbx + push %rbp mov _unused_lanes(state), unused_lanes mov unused_lanes, lane @@ -203,16 +190,8 @@ len_is_0: movl DWORD_tmp, _result_digest+1*16(job_rax) return: - - mov (%rsp), %rbx - mov 8*2(%rsp), %r10 #save old rsp - mov 8*3(%rsp), %rbp - mov 8*4(%rsp), %r12 - mov 8*5(%rsp), %r13 - mov 8*6(%rsp), %r14 - mov 8*7(%rsp), %r15 - mov %r10, %rsp - + pop %rbp + pop %rbx ret return_null: -- 2.4.3