* [PATCH v2 1/2] crypto: arm/aes-scalar - switch to common rev_32/mov_l macros
2021-03-07 16:54 [PATCH v2 0/2] crypto: arm - clean up redundant helper macros Ard Biesheuvel
@ 2021-03-07 16:54 ` Ard Biesheuvel
2021-03-10 7:09 ` Eric Biggers
2021-03-07 16:54 ` [PATCH v2 2/2] crypto: arm/chacha-scalar - switch to common rev_32 macro Ard Biesheuvel
1 sibling, 1 reply; 6+ messages in thread
From: Ard Biesheuvel @ 2021-03-07 16:54 UTC (permalink / raw)
To: linux-crypto
Cc: herbert, Ard Biesheuvel, Geert Uytterhoeven, Nicolas Pitre,
Eric Biggers, Linus Walleij
The scalar AES implementation has some locally defined macros which
reimplement things that are now available in macros defined in
assembler.h. So let's switch to those.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
arch/arm/crypto/aes-cipher-core.S | 42 +++++---------------
1 file changed, 10 insertions(+), 32 deletions(-)
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 472e56d09eea..1da3f41359aa 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -99,28 +99,6 @@
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
.endm
- .macro __rev, out, in
- .if __LINUX_ARM_ARCH__ < 6
- lsl t0, \in, #24
- and t1, \in, #0xff00
- and t2, \in, #0xff0000
- orr \out, t0, \in, lsr #24
- orr \out, \out, t1, lsl #8
- orr \out, \out, t2, lsr #8
- .else
- rev \out, \in
- .endif
- .endm
-
- .macro __adrl, out, sym, c
- .if __LINUX_ARM_ARCH__ < 7
- ldr\c \out, =\sym
- .else
- movw\c \out, #:lower16:\sym
- movt\c \out, #:upper16:\sym
- .endif
- .endm
-
.macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
@@ -133,10 +111,10 @@
ldr r7, [in, #12]
#ifdef CONFIG_CPU_BIG_ENDIAN
- __rev r4, r4
- __rev r5, r5
- __rev r6, r6
- __rev r7, r7
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
#endif
eor r4, r4, r8
@@ -144,7 +122,7 @@
eor r6, r6, r10
eor r7, r7, r11
- __adrl ttab, \ttab
+ mov_l ttab, \ttab
/*
* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
* L1 cache, assuming cacheline size >= 32. This is a hardening measure
@@ -180,7 +158,7 @@
2: .ifb \ltab
add ttab, ttab, #1
.else
- __adrl ttab, \ltab
+ mov_l ttab, \ltab
// Prefetch inverse S-box for final round; see explanation above
.set i, 0
.rept 256 / 64
@@ -194,10 +172,10 @@
\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
#ifdef CONFIG_CPU_BIG_ENDIAN
- __rev r4, r4
- __rev r5, r5
- __rev r6, r6
- __rev r7, r7
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
#endif
ldr out, [sp]
--
2.30.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 2/2] crypto: arm/chacha-scalar - switch to common rev_32 macro
2021-03-07 16:54 [PATCH v2 0/2] crypto: arm - clean up redundant helper macros Ard Biesheuvel
2021-03-07 16:54 ` [PATCH v2 1/2] crypto: arm/aes-scalar - switch to common rev_32/mov_l macros Ard Biesheuvel
@ 2021-03-07 16:54 ` Ard Biesheuvel
2021-03-10 7:07 ` Eric Biggers
1 sibling, 1 reply; 6+ messages in thread
From: Ard Biesheuvel @ 2021-03-07 16:54 UTC (permalink / raw)
To: linux-crypto
Cc: herbert, Ard Biesheuvel, Geert Uytterhoeven, Nicolas Pitre,
Eric Biggers, Linus Walleij
Drop the local definition of a byte swapping macro and use the common
one instead.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
arch/arm/crypto/chacha-scalar-core.S | 43 ++++++--------------
1 file changed, 13 insertions(+), 30 deletions(-)
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
index 2985b80a45b5..083fe1ab96d0 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -41,32 +41,15 @@
X14 .req r12
X15 .req r14
-.macro __rev out, in, t0, t1, t2
-.if __LINUX_ARM_ARCH__ >= 6
- rev \out, \in
-.else
- lsl \t0, \in, #24
- and \t1, \in, #0xff00
- and \t2, \in, #0xff0000
- orr \out, \t0, \in, lsr #24
- orr \out, \out, \t1, lsl #8
- orr \out, \out, \t2, lsr #8
-.endif
-.endm
-
-.macro _le32_bswap x, t0, t1, t2
+.macro _le32_bswap_4x a, b, c, d, tmp
#ifdef __ARMEB__
- __rev \x, \x, \t0, \t1, \t2
+ rev_l \a, \tmp
+ rev_l \b, \tmp
+ rev_l \c, \tmp
+ rev_l \d, \tmp
#endif
.endm
-.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
- _le32_bswap \a, \t0, \t1, \t2
- _le32_bswap \b, \t0, \t1, \t2
- _le32_bswap \c, \t0, \t1, \t2
- _le32_bswap \d, \t0, \t1, \t2
-.endm
-
.macro __ldrd a, b, src, offset
#if __LINUX_ARM_ARCH__ >= 6
ldrd \a, \b, [\src, #\offset]
@@ -200,7 +183,7 @@
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
- _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ _le32_bswap_4x X0, X1, X2, X3, r8
ldmia r12!, {r8-r11}
eor X0, X0, r8
eor X1, X1, r9
@@ -216,7 +199,7 @@
ldmia r12!, {X0-X3}
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
- _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ _le32_bswap_4x X4, X5, X6, X7, r8
eor X4, X4, X0
eor X5, X5, X1
eor X6, X6, X2
@@ -231,7 +214,7 @@
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
- _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ _le32_bswap_4x r0, r1, r6, r7, r8
ldmia r12!, {r8-r11}
eor r0, r0, r8 // x8
eor r1, r1, r9 // x9
@@ -245,7 +228,7 @@
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
- _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ _le32_bswap_4x r2, r3, r4, r5, r9
ldr r9, [sp, #72] // load LEN
eor r2, r2, r0 // x12
eor r3, r3, r1 // x13
@@ -301,7 +284,7 @@
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
- _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ _le32_bswap_4x X0, X1, X2, X3, r8
stmia r14!, {X0-X3}
// Save keystream for x4-x7
@@ -311,7 +294,7 @@
add X5, r9, X5, ror #brot
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
- _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ _le32_bswap_4x X4, X5, X6, X7, r8
add r8, sp, #64
stmia r14!, {X4-X7}
@@ -323,7 +306,7 @@
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
- _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ _le32_bswap_4x r0, r1, r6, r7, r8
stmia r14!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 144
__ldrd r10, r11, sp, 152
@@ -331,7 +314,7 @@
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
- _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ _le32_bswap_4x r2, r3, r4, r5, r9
stmia r14, {r2-r5}
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
--
2.30.1
^ permalink raw reply related [flat|nested] 6+ messages in thread