[PATCH v6 kspp-next 18/22] arm64/crypto: conditionally place ASM functions into separate sections

From: Alexander Lobakin <alexandr.lobakin@intel.com>
To: linux-hardening@vger.kernel.org
Cc: "Kristen C Accardi" <kristen.c.accardi@intel.com>,
	Kristen Carlson Accardi <kristen@linux.intel.com>,
	Kees Cook <keescook@chromium.org>,
	Masahiro Yamada <masahiroy@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>, Jessica Yu <jeyu@kernel.org>,
	Nathan Chancellor <nathan@kernel.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Marios Pomonis <pomonis@google.com>,
	Sami Tolvanen <samitolvanen@google.com>,
	Tony Luck <tony.luck@intel.com>, Ard Biesheuvel <ardb@kernel.org>,
	Jesse Brandeburg <jesse.brandeburg@intel.com>,
	Lukasz Czapnik <lukasz.czapnik@intel.com>,
	"Marta A Plantykow" <marta.a.plantykow@intel.com>,
	Michal Kubiak <michal.kubiak@intel.com>,
	Michal Swiatkowski <michal.swiatkowski@intel.com>,
	Alexander Lobakin <alexandr.lobakin@intel.com>,
	linux-kbuild@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-kernel@vger.kernel.org, clang-built-linux@googlegroups.com
Subject: [PATCH v6 kspp-next 18/22] arm64/crypto: conditionally place ASM functions into separate sections
Date: Tue, 31 Aug 2021 16:41:10 +0200	[thread overview]
Message-ID: <20210831144114.154-19-alexandr.lobakin@intel.com> (raw)
In-Reply-To: <20210831144114.154-1-alexandr.lobakin@intel.com>

The resulting LD script generated by FG-KASLR script contains a size
assertion for the input .text function. In case if it's not empty,
the build will stop plug a potentional layout leakage.
As FG-KASLR for modules tends to be arch-independent, we should take
care of the modular ASM code of every architecture to not break the
build.
This is the ARM64 part.

Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
---
 arch/arm64/crypto/aes-ce-ccm-core.S   | 16 +++++------
 arch/arm64/crypto/aes-ce-core.S       | 16 +++++------
 arch/arm64/crypto/aes-ce.S            |  4 +--
 arch/arm64/crypto/aes-cipher-core.S   |  8 +++---
 arch/arm64/crypto/aes-modes.S         | 16 +++++------
 arch/arm64/crypto/aes-neon.S          |  4 +--
 arch/arm64/crypto/aes-neonbs-core.S   | 38 +++++++++++++--------------
 arch/arm64/crypto/chacha-neon-core.S  | 18 ++++++-------
 arch/arm64/crypto/crct10dif-ce-core.S | 14 +++++-----
 arch/arm64/crypto/ghash-ce-core.S     | 24 ++++++++---------
 arch/arm64/crypto/nh-neon-core.S      |  4 +--
 arch/arm64/crypto/poly1305-armv8.pl   | 17 ++++++++++++
 arch/arm64/crypto/sha1-ce-core.S      |  4 +--
 arch/arm64/crypto/sha2-ce-core.S      |  4 +--
 arch/arm64/crypto/sha3-ce-core.S      |  4 +--
 arch/arm64/crypto/sha512-armv8.pl     | 11 ++++++++
 arch/arm64/crypto/sha512-ce-core.S    |  4 +--
 arch/arm64/crypto/sm3-ce-core.S       |  4 +--
 arch/arm64/crypto/sm4-ce-core.S       |  4 +--
 19 files changed, 121 insertions(+), 93 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 99a028e298ed..e9a7c7bfecda 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -15,7 +15,7 @@
 	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
 	 *			     u32 *macp, u8 const rk[], u32 rounds);
 	 */
-SYM_FUNC_START(ce_aes_ccm_auth_data)
+SYM_FUNC_START_SECTION(ce_aes_ccm_auth_data)
 	ldr	w8, [x3]			/* leftover from prev round? */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cbz	w8, 1f
@@ -81,13 +81,13 @@ SYM_FUNC_START(ce_aes_ccm_auth_data)
 	st1	{v0.16b}, [x0]
 10:	str	w8, [x3]
 	ret
-SYM_FUNC_END(ce_aes_ccm_auth_data)
+SYM_FUNC_END_SECTION(ce_aes_ccm_auth_data)
 
 	/*
 	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
 	 * 			 u32 rounds);
 	 */
-SYM_FUNC_START(ce_aes_ccm_final)
+SYM_FUNC_START_SECTION(ce_aes_ccm_final)
 	ld1	{v3.4s}, [x2], #16		/* load first round key */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
@@ -121,7 +121,7 @@ SYM_FUNC_START(ce_aes_ccm_final)
 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
 	st1	{v0.16b}, [x0]			/* store result */
 	ret
-SYM_FUNC_END(ce_aes_ccm_final)
+SYM_FUNC_END_SECTION(ce_aes_ccm_final)
 
 	.macro	aes_ccm_do_crypt,enc
 	ldr	x8, [x6, #8]			/* load lower ctr */
@@ -212,10 +212,10 @@ CPU_LE(	rev	x8, x8			)
 	 * 			   u8 const rk[], u32 rounds, u8 mac[],
 	 * 			   u8 ctr[]);
 	 */
-SYM_FUNC_START(ce_aes_ccm_encrypt)
+SYM_FUNC_START_SECTION(ce_aes_ccm_encrypt)
 	aes_ccm_do_crypt	1
-SYM_FUNC_END(ce_aes_ccm_encrypt)
+SYM_FUNC_END_SECTION(ce_aes_ccm_encrypt)
 
-SYM_FUNC_START(ce_aes_ccm_decrypt)
+SYM_FUNC_START_SECTION(ce_aes_ccm_decrypt)
 	aes_ccm_do_crypt	0
-SYM_FUNC_END(ce_aes_ccm_decrypt)
+SYM_FUNC_END_SECTION(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
index e52e13eb8fdb..abe6ee0501bf 100644
--- a/arch/arm64/crypto/aes-ce-core.S
+++ b/arch/arm64/crypto/aes-ce-core.S
@@ -8,7 +8,7 @@
 
 	.arch		armv8-a+crypto
 
-SYM_FUNC_START(__aes_ce_encrypt)
+SYM_FUNC_START_SECTION(__aes_ce_encrypt)
 	sub		w3, w3, #2
 	ld1		{v0.16b}, [x2]
 	ld1		{v1.4s}, [x0], #16
@@ -34,9 +34,9 @@ SYM_FUNC_START(__aes_ce_encrypt)
 	eor		v0.16b, v0.16b, v3.16b
 	st1		{v0.16b}, [x1]
 	ret
-SYM_FUNC_END(__aes_ce_encrypt)
+SYM_FUNC_END_SECTION(__aes_ce_encrypt)
 
-SYM_FUNC_START(__aes_ce_decrypt)
+SYM_FUNC_START_SECTION(__aes_ce_decrypt)
 	sub		w3, w3, #2
 	ld1		{v0.16b}, [x2]
 	ld1		{v1.4s}, [x0], #16
@@ -62,23 +62,23 @@ SYM_FUNC_START(__aes_ce_decrypt)
 	eor		v0.16b, v0.16b, v3.16b
 	st1		{v0.16b}, [x1]
 	ret
-SYM_FUNC_END(__aes_ce_decrypt)
+SYM_FUNC_END_SECTION(__aes_ce_decrypt)
 
 /*
  * __aes_ce_sub() - use the aese instruction to perform the AES sbox
  *                  substitution on each byte in 'input'
  */
-SYM_FUNC_START(__aes_ce_sub)
+SYM_FUNC_START_SECTION(__aes_ce_sub)
 	dup		v1.4s, w0
 	movi		v0.16b, #0
 	aese		v0.16b, v1.16b
 	umov		w0, v0.s[0]
 	ret
-SYM_FUNC_END(__aes_ce_sub)
+SYM_FUNC_END_SECTION(__aes_ce_sub)
 
-SYM_FUNC_START(__aes_ce_invert)
+SYM_FUNC_START_SECTION(__aes_ce_invert)
 	ld1		{v0.4s}, [x1]
 	aesimc		v1.16b, v0.16b
 	st1		{v1.4s}, [x0]
 	ret
-SYM_FUNC_END(__aes_ce_invert)
+SYM_FUNC_END_SECTION(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 1dc5bbbfeed2..909d2dcf0907 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -9,8 +9,8 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-#define AES_FUNC_START(func)		SYM_FUNC_START(ce_ ## func)
-#define AES_FUNC_END(func)		SYM_FUNC_END(ce_ ## func)
+#define AES_FUNC_START(func)		SYM_FUNC_START_SECTION(ce_ ## func)
+#define AES_FUNC_END(func)		SYM_FUNC_END_SECTION(ce_ ## func)
 
 	.arch		armv8-a+crypto
 
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index c9d6955f8404..e47c0aef7a7d 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -122,11 +122,11 @@ CPU_BE(	rev		w7, w7		)
 	ret
 	.endm
 
-SYM_FUNC_START(__aes_arm64_encrypt)
+SYM_FUNC_START_SECTION(__aes_arm64_encrypt)
 	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-SYM_FUNC_END(__aes_arm64_encrypt)
+SYM_FUNC_END_SECTION(__aes_arm64_encrypt)
 
+SYM_FUNC_START_SECTION(__aes_arm64_decrypt)
 	.align		5
-SYM_FUNC_START(__aes_arm64_decrypt)
 	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
-SYM_FUNC_END(__aes_arm64_decrypt)
+SYM_FUNC_END_SECTION(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index b495de22bb38..5f7a43fa8438 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -22,26 +22,26 @@
 #define ST5(x...) x
 #endif
 
-SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
+SYM_FUNC_START_LOCAL_SECTION(aes_encrypt_block4x)
 	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
-SYM_FUNC_END(aes_encrypt_block4x)
+SYM_FUNC_END_SECTION(aes_encrypt_block4x)
 
-SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
+SYM_FUNC_START_LOCAL_SECTION(aes_decrypt_block4x)
 	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
-SYM_FUNC_END(aes_decrypt_block4x)
+SYM_FUNC_END_SECTION(aes_decrypt_block4x)
 
 #if MAX_STRIDE == 5
-SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
+SYM_FUNC_START_LOCAL_SECTION(aes_encrypt_block5x)
 	encrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
 	ret
-SYM_FUNC_END(aes_encrypt_block5x)
+SYM_FUNC_END_SECTION(aes_encrypt_block5x)
 
-SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
+SYM_FUNC_START_LOCAL_SECTION(aes_decrypt_block5x)
 	decrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
 	ret
-SYM_FUNC_END(aes_decrypt_block5x)
+SYM_FUNC_END_SECTION(aes_decrypt_block5x)
 #endif
 
 	/*
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index e47d3ec2cfb4..9c8d6cccd2cd 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -8,8 +8,8 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
-#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
+#define AES_FUNC_START(func)		SYM_FUNC_START_SECTION(neon_ ## func)
+#define AES_FUNC_END(func)		SYM_FUNC_END_SECTION(neon_ ## func)
 
 	xtsmask		.req	v7
 	cbciv		.req	v7
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index a3405b8c344b..582343f18ad0 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -380,7 +380,7 @@ ISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
 	/*
 	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
 	 */
-SYM_FUNC_START(aesbs_convert_key)
+SYM_FUNC_START_SECTION(aesbs_convert_key)
 	ld1		{v7.4s}, [x1], #16		// load round 0 key
 	ld1		{v17.4s}, [x1], #16		// load round 1 key
 
@@ -425,10 +425,10 @@ SYM_FUNC_START(aesbs_convert_key)
 	eor		v17.16b, v17.16b, v7.16b
 	str		q17, [x0]
 	ret
-SYM_FUNC_END(aesbs_convert_key)
+SYM_FUNC_END_SECTION(aesbs_convert_key)
 
+SYM_FUNC_START_LOCAL_SECTION(aesbs_encrypt8)
 	.align		4
-SYM_FUNC_START_LOCAL(aesbs_encrypt8)
 	ldr		q9, [bskey], #16		// round 0 key
 	ldr		q8, M0SR
 	ldr		q24, SR
@@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
 	eor		v2.16b, v2.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-SYM_FUNC_END(aesbs_encrypt8)
+SYM_FUNC_END_SECTION(aesbs_encrypt8)
 
+SYM_FUNC_START_LOCAL_SECTION(aesbs_decrypt8)
 	.align		4
-SYM_FUNC_START_LOCAL(aesbs_decrypt8)
 	lsl		x9, rounds, #7
 	add		bskey, bskey, x9
 
@@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8)
 	eor		v3.16b, v3.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-SYM_FUNC_END(aesbs_decrypt8)
+SYM_FUNC_END_SECTION(aesbs_decrypt8)
 
 	/*
 	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
@@ -619,13 +619,13 @@ SYM_FUNC_END(aesbs_decrypt8)
 	ret
 	.endm
 
+SYM_FUNC_START_SECTION(aesbs_ecb_encrypt)
 	.align		4
-SYM_FUNC_START(aesbs_ecb_encrypt)
 	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-SYM_FUNC_END(aesbs_ecb_encrypt)
+SYM_FUNC_END_SECTION(aesbs_ecb_encrypt)
 
+SYM_FUNC_START_SECTION(aesbs_ecb_decrypt)
 	.align		4
-SYM_FUNC_START(aesbs_ecb_decrypt)
 	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
 SYM_FUNC_END(aesbs_ecb_decrypt)
 
@@ -633,8 +633,8 @@ SYM_FUNC_END(aesbs_ecb_decrypt)
 	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
 	 */
+SYM_FUNC_START_SECTION(aesbs_cbc_decrypt)
 	.align		4
-SYM_FUNC_START(aesbs_cbc_decrypt)
 	frame_push	6
 
 	mov		x19, x0
@@ -718,7 +718,7 @@ SYM_FUNC_START(aesbs_cbc_decrypt)
 
 2:	frame_pop
 	ret
-SYM_FUNC_END(aesbs_cbc_decrypt)
+SYM_FUNC_END_SECTION(aesbs_cbc_decrypt)
 
 	.macro		next_tweak, out, in, const, tmp
 	sshr		\tmp\().2d,  \in\().2d,   #63
@@ -734,7 +734,7 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
 	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
 	 */
-SYM_FUNC_START_LOCAL(__xts_crypt8)
+SYM_FUNC_START_LOCAL_SECTION(__xts_crypt8)
 	mov		x6, #1
 	lsl		x6, x6, x23
 	subs		w23, w23, #8
@@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
 0:	mov		bskey, x21
 	mov		rounds, x22
 	br		x16
-SYM_FUNC_END(__xts_crypt8)
+SYM_FUNC_END_SECTION(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
 	frame_push	6, 64
@@ -851,13 +851,13 @@ SYM_FUNC_END(__xts_crypt8)
 	ret
 	.endm
 
-SYM_FUNC_START(aesbs_xts_encrypt)
+SYM_FUNC_START_SECTION(aesbs_xts_encrypt)
 	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-SYM_FUNC_END(aesbs_xts_encrypt)
+SYM_FUNC_END_SECTION(aesbs_xts_encrypt)
 
-SYM_FUNC_START(aesbs_xts_decrypt)
+SYM_FUNC_START_SECTION(aesbs_xts_decrypt)
 	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-SYM_FUNC_END(aesbs_xts_decrypt)
+SYM_FUNC_END_SECTION(aesbs_xts_decrypt)
 
 	.macro		next_ctr, v
 	mov		\v\().d[1], x8
@@ -871,7 +871,7 @@ SYM_FUNC_END(aesbs_xts_decrypt)
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
 	 *		     int rounds, int blocks, u8 iv[], u8 final[])
 	 */
-SYM_FUNC_START(aesbs_ctr_encrypt)
+SYM_FUNC_START_SECTION(aesbs_ctr_encrypt)
 	frame_push	8
 
 	mov		x19, x0
@@ -998,4 +998,4 @@ CPU_LE(	rev		x8, x8		)
 7:	cbz		x25, 8b
 	st1		{v5.16b}, [x25]
 	b		8b
-SYM_FUNC_END(aesbs_ctr_encrypt)
+SYM_FUNC_END_SECTION(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index b70ac76f2610..34a3087055f8 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -23,7 +23,6 @@
 #include <asm/cache.h>
 
 	.text
-	.align		6
 
 /*
  * chacha_permute - permute one block
@@ -36,7 +35,8 @@
  *
  * Clobbers: w3, x10, v4, v12
  */
-SYM_FUNC_START_LOCAL(chacha_permute)
+SYM_FUNC_START_LOCAL_SECTION(chacha_permute)
+	.align		6
 
 	adr_l		x10, ROT8
 	ld1		{v12.4s}, [x10]
@@ -104,9 +104,9 @@ SYM_FUNC_START_LOCAL(chacha_permute)
 	b.ne		.Ldoubleround
 
 	ret
-SYM_FUNC_END(chacha_permute)
+SYM_FUNC_END_SECTION(chacha_permute)
 
-SYM_FUNC_START(chacha_block_xor_neon)
+SYM_FUNC_START_SECTION(chacha_block_xor_neon)
 	// x0: Input state matrix, s
 	// x1: 1 data block output, o
 	// x2: 1 data block input, i
@@ -143,9 +143,9 @@ SYM_FUNC_START(chacha_block_xor_neon)
 
 	ldp		x29, x30, [sp], #16
 	ret
-SYM_FUNC_END(chacha_block_xor_neon)
+SYM_FUNC_END_SECTION(chacha_block_xor_neon)
 
-SYM_FUNC_START(hchacha_block_neon)
+SYM_FUNC_START_SECTION(hchacha_block_neon)
 	// x0: Input state matrix, s
 	// x1: output (8 32-bit words)
 	// w2: nrounds
@@ -163,7 +163,7 @@ SYM_FUNC_START(hchacha_block_neon)
 
 	ldp		x29, x30, [sp], #16
 	ret
-SYM_FUNC_END(hchacha_block_neon)
+SYM_FUNC_END_SECTION(hchacha_block_neon)
 
 	a0		.req	w12
 	a1		.req	w13
@@ -182,8 +182,8 @@ SYM_FUNC_END(hchacha_block_neon)
 	a14		.req	w27
 	a15		.req	w28
 
+SYM_FUNC_START_SECTION(chacha_4block_xor_neon)
 	.align		6
-SYM_FUNC_START(chacha_4block_xor_neon)
 	frame_push	10
 
 	// x0: Input state matrix, s
@@ -790,7 +790,7 @@ CPU_BE(	  rev		a15, a15	)
 	st1		{v28.16b-v31.16b}, [x7]		// overlapping stores
 3:	st1		{v24.16b-v27.16b}, [x1]
 	b		.Lout
-SYM_FUNC_END(chacha_4block_xor_neon)
+SYM_FUNC_END_SECTION(chacha_4block_xor_neon)
 
 	.section	".rodata", "a", %progbits
 	.align		L1_CACHE_SHIFT
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index dce6dcebfca1..54e121a56895 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -131,7 +131,7 @@
 	tbl		bd4.16b, {\bd\().16b}, perm4.16b
 	.endm
 
-SYM_FUNC_START_LOCAL(__pmull_p8_core)
+SYM_FUNC_START_LOCAL_SECTION(__pmull_p8_core)
 .L__pmull_p8_core:
 	ext		t4.8b, ad.8b, ad.8b, #1			// A1
 	ext		t5.8b, ad.8b, ad.8b, #2			// A2
@@ -194,7 +194,7 @@ SYM_FUNC_START_LOCAL(__pmull_p8_core)
 	eor		t4.16b, t4.16b, t5.16b
 	eor		t6.16b, t6.16b, t3.16b
 	ret
-SYM_FUNC_END(__pmull_p8_core)
+SYM_FUNC_END_SECTION(__pmull_p8_core)
 
 	.macro		__pmull_p8, rq, ad, bd, i
 	.ifnc		\bd, fold_consts
@@ -465,21 +465,21 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 //
 // Assumes len >= 16.
 //
-SYM_FUNC_START(crc_t10dif_pmull_p8)
+SYM_FUNC_START_SECTION(crc_t10dif_pmull_p8)
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 	crc_t10dif_pmull p8
-SYM_FUNC_END(crc_t10dif_pmull_p8)
+SYM_FUNC_END_SECTION(crc_t10dif_pmull_p8)
 
-	.align		5
 //
 // u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
 //
 // Assumes len >= 16.
 //
-SYM_FUNC_START(crc_t10dif_pmull_p64)
+SYM_FUNC_START_SECTION(crc_t10dif_pmull_p64)
+	.align			5
 	crc_t10dif_pmull	p64
-SYM_FUNC_END(crc_t10dif_pmull_p64)
+SYM_FUNC_END_SECTION(crc_t10dif_pmull_p64)
 
 	.section	".rodata", "a"
 	.align		4
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 7868330dd54e..a69c1d4479db 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -350,13 +350,13 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
 	 *			   struct ghash_key const *k, const char *head)
 	 */
-SYM_FUNC_START(pmull_ghash_update_p64)
+SYM_FUNC_START_SECTION(pmull_ghash_update_p64)
 	__pmull_ghash	p64
-SYM_FUNC_END(pmull_ghash_update_p64)
+SYM_FUNC_END_SECTION(pmull_ghash_update_p64)
 
-SYM_FUNC_START(pmull_ghash_update_p8)
+SYM_FUNC_START_SECTION(pmull_ghash_update_p8)
 	__pmull_ghash	p8
-SYM_FUNC_END(pmull_ghash_update_p8)
+SYM_FUNC_END_SECTION(pmull_ghash_update_p8)
 
 	KS0		.req	v8
 	KS1		.req	v9
@@ -602,20 +602,20 @@ CPU_LE(	rev		w8, w8		)
 	 *			  struct ghash_key const *k, u64 dg[], u8 ctr[],
 	 *			  int rounds, u8 tag)
 	 */
-SYM_FUNC_START(pmull_gcm_encrypt)
+SYM_FUNC_START_SECTION(pmull_gcm_encrypt)
 	pmull_gcm_do_crypt	1
-SYM_FUNC_END(pmull_gcm_encrypt)
+SYM_FUNC_END_SECTION(pmull_gcm_encrypt)
 
 	/*
 	 * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
 	 *			  struct ghash_key const *k, u64 dg[], u8 ctr[],
 	 *			  int rounds, u8 tag)
 	 */
-SYM_FUNC_START(pmull_gcm_decrypt)
+SYM_FUNC_START_SECTION(pmull_gcm_decrypt)
 	pmull_gcm_do_crypt	0
-SYM_FUNC_END(pmull_gcm_decrypt)
+SYM_FUNC_END_SECTION(pmull_gcm_decrypt)
 
-SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
+SYM_FUNC_START_LOCAL_SECTION(pmull_gcm_ghash_4x)
 	movi		MASK.16b, #0xe1
 	shl		MASK.2d, MASK.2d, #57
 
@@ -696,9 +696,9 @@ SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
 	eor		XL.16b, XL.16b, T2.16b
 
 	ret
-SYM_FUNC_END(pmull_gcm_ghash_4x)
+SYM_FUNC_END_SECTION(pmull_gcm_ghash_4x)
 
-SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
+SYM_FUNC_START_LOCAL_SECTION(pmull_gcm_enc_4x)
 	ld1		{KS0.16b}, [x5]			// load upper counter
 	sub		w10, w8, #4
 	sub		w11, w8, #3
@@ -761,7 +761,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	eor		INP3.16b, INP3.16b, KS3.16b
 
 	ret
-SYM_FUNC_END(pmull_gcm_enc_4x)
+SYM_FUNC_END_SECTION(pmull_gcm_enc_4x)
 
 	.section	".rodata", "a"
 	.align		6
diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S
index 51c0a534ef87..cb354d3f7e7b 100644
--- a/arch/arm64/crypto/nh-neon-core.S
+++ b/arch/arm64/crypto/nh-neon-core.S
@@ -62,7 +62,7 @@
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-SYM_FUNC_START(nh_neon)
+SYM_FUNC_START_SECTION(nh_neon)
 
 	ld1		{K0.4s,K1.4s}, [KEY], #32
 	  movi		PASS0_SUMS.2d, #0
@@ -100,4 +100,4 @@ SYM_FUNC_START(nh_neon)
 	addp		T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
 	st1		{T0.16b,T1.16b}, [HASH]
 	ret
-SYM_FUNC_END(nh_neon)
+SYM_FUNC_END_SECTION(nh_neon)
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
index cbc980fb02e3..039e6a9ce68c 100644
--- a/arch/arm64/crypto/poly1305-armv8.pl
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -48,8 +48,12 @@ my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
 
 $code.=<<___;
 #ifndef __KERNEL__
+# define SYM_TEXT_SECTION()
+# define SYM_TEXT_END_SECTION
 # include "arm_arch.h"
 .extern	OPENSSL_armcap_P
+#else
+# include <linux/linkage.h>
 #endif
 
 .text
@@ -58,6 +62,7 @@ $code.=<<___;
 .globl	poly1305_blocks
 .globl	poly1305_emit
 
+SYM_TEXT_SECTION(poly1305_init)
 .globl	poly1305_init
 .type	poly1305_init,%function
 .align	5
@@ -107,7 +112,9 @@ poly1305_init:
 .Lno_key:
 	ret
 .size	poly1305_init,.-poly1305_init
+SYM_TEXT_END_SECTION
 
+SYM_TEXT_SECTION(poly1305_blocks)
 .type	poly1305_blocks,%function
 .align	5
 poly1305_blocks:
@@ -198,7 +205,9 @@ poly1305_blocks:
 .Lno_data:
 	ret
 .size	poly1305_blocks,.-poly1305_blocks
+SYM_TEXT_END_SECTION
 
+SYM_TEXT_SECTION(poly1305_emit)
 .type	poly1305_emit,%function
 .align	5
 poly1305_emit:
@@ -258,6 +267,7 @@ poly1305_emit:
 
 	ret
 .size	poly1305_emit,.-poly1305_emit
+SYM_TEXT_END_SECTION
 ___
 my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
 my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
@@ -270,6 +280,7 @@ my ($in2,$zeros)=("x16","x17");
 my $is_base2_26 = $zeros;		# borrow
 
 $code.=<<___;
+SYM_TEXT_SECTION(poly1305_mult)
 .type	poly1305_mult,%function
 .align	5
 poly1305_mult:
@@ -306,7 +317,9 @@ poly1305_mult:
 
 	ret
 .size	poly1305_mult,.-poly1305_mult
+SYM_TEXT_END_SECTION
 
+SYM_TEXT_SECTION(poly1305_splat)
 .type	poly1305_splat,%function
 .align	4
 poly1305_splat:
@@ -333,7 +346,9 @@ poly1305_splat:
 
 	ret
 .size	poly1305_splat,.-poly1305_splat
+SYM_TEXT_END_SECTION
 
+SYM_TEXT_SECTION(poly1305_blocks_neon)
 #ifdef	__KERNEL__
 .globl	poly1305_blocks_neon
 #endif
@@ -888,6 +903,8 @@ poly1305_blocks_neon:
 .align	5
 .Lzeros:
 .long	0,0,0,0,0,0,0,0
+SYM_TEXT_END_SECTION
+
 .asciz	"Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
 .align	2
 #if !defined(__KERNEL__) && !defined(_WIN64)
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 889ca0f8972b..2ba5f8ea39fc 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -65,7 +65,7 @@
 	 * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 	 *			 int blocks)
 	 */
-SYM_FUNC_START(sha1_ce_transform)
+SYM_FUNC_START_SECTION(sha1_ce_transform)
 	/* load round constants */
 	loadrc		k0.4s, 0x5a827999, w6
 	loadrc		k1.4s, 0x6ed9eba1, w6
@@ -147,4 +147,4 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	str		dgb, [x0, #16]
 	mov		w0, w2
 	ret
-SYM_FUNC_END(sha1_ce_transform)
+SYM_FUNC_END_SECTION(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 491179922f49..6c1a4a128355 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -75,7 +75,7 @@
 	 *			  int blocks)
 	 */
 	.text
-SYM_FUNC_START(sha2_ce_transform)
+SYM_FUNC_START_SECTION(sha2_ce_transform)
 	/* load round constants */
 	adr_l		x8, .Lsha2_rcon
 	ld1		{ v0.4s- v3.4s}, [x8], #64
@@ -154,4 +154,4 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 3:	st1		{dgav.4s, dgbv.4s}, [x0]
 	mov		w0, w2
 	ret
-SYM_FUNC_END(sha2_ce_transform)
+SYM_FUNC_END_SECTION(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 9c77313f5a60..6105cc815c9a 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -40,7 +40,7 @@
 	 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
 	 */
 	.text
-SYM_FUNC_START(sha3_ce_transform)
+SYM_FUNC_START_SECTION(sha3_ce_transform)
 	/* load state */
 	add	x8, x0, #32
 	ld1	{ v0.1d- v3.1d}, [x0]
@@ -197,7 +197,7 @@ SYM_FUNC_START(sha3_ce_transform)
 	st1	{v24.1d}, [x0]
 	mov	w0, w2
 	ret
-SYM_FUNC_END(sha3_ce_transform)
+SYM_FUNC_END_SECTION(sha3_ce_transform)
 
 	.section	".rodata", "a"
 	.align		8
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
index 2d8655d5b1af..7952696d3c88 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -195,11 +195,16 @@ ___
 $code.=<<___;
 #ifndef	__KERNEL__
 # include "arm_arch.h"
+# define SYM_TEXT_SECTION()
+# define SYM_TEXT_END_SECTION
+#else
+# include <linux/linkage.h>
 #endif
 
 .text
 
 .extern	OPENSSL_armcap_P
+SYM_TEXT_SECTION($func)
 .globl	$func
 .type	$func,%function
 .align	6
@@ -285,7 +290,9 @@ $code.=<<___;
 	ldp	x29,x30,[sp],#128
 	ret
 .size	$func,.-$func
+SYM_TEXT_END_SECTION
 
+SYM_TEXT_SECTION(K$BITS)
 .align	6
 .type	.LK$BITS,%object
 .LK$BITS:
@@ -354,6 +361,8 @@ $code.=<<___ if ($SZ==4);
 ___
 $code.=<<___;
 .size	.LK$BITS,.-.LK$BITS
+SYM_TEXT_END_SECTION
+
 #ifndef	__KERNEL__
 .align	3
 .LOPENSSL_armcap_P:
@@ -637,6 +646,7 @@ sub body_00_15 () {
 }
 
 $code.=<<___;
+SYM_TEXT_SECTION(sha256_block_neon)
 #ifdef	__KERNEL__
 .globl	sha256_block_neon
 #endif
@@ -736,6 +746,7 @@ $code.=<<___;
 	add	sp,sp,#16*4+16
 	ret
 .size	sha256_block_neon,.-sha256_block_neon
+SYM_TEXT_END_SECTION
 ___
 }
 
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index b6a3a36e15f5..7d34aabb3daa 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -106,7 +106,7 @@
 	 *			  int blocks)
 	 */
 	.text
-SYM_FUNC_START(sha512_ce_transform)
+SYM_FUNC_START_SECTION(sha512_ce_transform)
 	/* load state */
 	ld1		{v8.2d-v11.2d}, [x0]
 
@@ -203,4 +203,4 @@ CPU_LE(	rev64		v19.16b, v19.16b	)
 3:	st1		{v8.2d-v11.2d}, [x0]
 	mov		w0, w2
 	ret
-SYM_FUNC_END(sha512_ce_transform)
+SYM_FUNC_END_SECTION(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S
index ef97d3187cb7..7be60c41e36d 100644
--- a/arch/arm64/crypto/sm3-ce-core.S
+++ b/arch/arm64/crypto/sm3-ce-core.S
@@ -73,7 +73,7 @@
 	 *                       int blocks)
 	 */
 	.text
-SYM_FUNC_START(sm3_ce_transform)
+SYM_FUNC_START_SECTION(sm3_ce_transform)
 	/* load state */
 	ld1		{v8.4s-v9.4s}, [x0]
 	rev64		v8.4s, v8.4s
@@ -131,7 +131,7 @@ CPU_LE(	rev32		v3.16b, v3.16b		)
 	ext		v9.16b, v9.16b, v9.16b, #8
 	st1		{v8.4s-v9.4s}, [x0]
 	ret
-SYM_FUNC_END(sm3_ce_transform)
+SYM_FUNC_END_SECTION(sm3_ce_transform)
 
 	.section	".rodata", "a"
 	.align		3
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 4ac6cfbc5797..5f64ed209a26 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -15,7 +15,7 @@
 	 * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
 	 */
 	.text
-SYM_FUNC_START(sm4_ce_do_crypt)
+SYM_FUNC_START_SECTION(sm4_ce_do_crypt)
 	ld1		{v8.4s}, [x2]
 	ld1		{v0.4s-v3.4s}, [x0], #64
 CPU_LE(	rev32		v8.16b, v8.16b		)
@@ -33,4 +33,4 @@ CPU_LE(	rev32		v8.16b, v8.16b		)
 CPU_LE(	rev32		v8.16b, v8.16b		)
 	st1		{v8.4s}, [x1]
 	ret
-SYM_FUNC_END(sm4_ce_do_crypt)
+SYM_FUNC_END_SECTION(sm4_ce_do_crypt)
-- 
2.31.1