From: "Jason A. Donenfeld" <Jason@zx2c4.com> To: linux-kernel@vger.kernel.org, netdev@vger.kernel.org, davem@davemloft.net, gregkh@linuxfoundation.org Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>, Russell King <linux@armlinux.org.uk>, linux-arm-kernel@lists.infradead.org, Samuel Neves <sneves@dei.uc.pt>, Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>, Andy Lutomirski <luto@kernel.org>, Andrew Morton <akpm@linux-foundation.org>, Linus Torvalds <torvalds@linux-foundation.org>, kernel-hardening@lists.openwall.com, linux-crypto@vger.kernel.org Subject: [PATCH net-next v7 08/28] zinc: port Andy Polyakov's ChaCha20 ARM and ARM64 implementations Date: Sat, 6 Oct 2018 04:56:49 +0200 [thread overview] Message-ID: <20181006025709.4019-9-Jason@zx2c4.com> (raw) In-Reply-To: <20181006025709.4019-1-Jason@zx2c4.com> These port and prepare Andy Polyakov's implementations for the kernel, but don't actually wire up any of the code yet. The wiring will be done in a subsequent commit, since we'll need to merge these implementations with another one. We make a few small changes to the assembly: - Entries and exits use the proper kernel convention macro. - CPU feature checking is done in C by the glue code, so that has been removed from the assembly. - The function names have been renamed to fit kernel conventions. - Labels have been renamed (prefixed with .L) to fit kernel conventions. - Constants have been rearranged so that they are closer to the code that is using them. [ARM only] - The neon code can jump to the scalar code when it makes sense to do so. - The neon_512 function as a separate function has been removed, leaving the decision up to the main neon entry point. [ARM64 only] Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Cc: Russell King <linux@armlinux.org.uk> Cc: linux-arm-kernel@lists.infradead.org Cc: Samuel Neves <sneves@dei.uc.pt> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: kernel-hardening@lists.openwall.com Cc: linux-crypto@vger.kernel.org --- lib/zinc/chacha20/chacha20-arm-cryptogams.S | 367 +++++++++--------- lib/zinc/chacha20/chacha20-arm64-cryptogams.S | 75 ++-- 2 files changed, 202 insertions(+), 240 deletions(-) diff --git a/lib/zinc/chacha20/chacha20-arm-cryptogams.S b/lib/zinc/chacha20/chacha20-arm-cryptogams.S index 05a3a9e6e93f..770bab469171 100644 --- a/lib/zinc/chacha20/chacha20-arm-cryptogams.S +++ b/lib/zinc/chacha20/chacha20-arm-cryptogams.S @@ -1,9 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from CRYPTOGAMS. */ -#include "arm_arch.h" +#include <linux/linkage.h> .text #if defined(__thumb2__) || defined(__clang__) @@ -24,48 +27,25 @@ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral .Lone: .long 1,0,0,0 -.Lrot8: -.long 0x02010003,0x06050407 -#if __ARM_MAX_ARCH__>=7 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.LChaCha20_ctr32 -#else .word -1 -#endif -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function .align 5 -ChaCha20_ctr32: -.LChaCha20_ctr32: +ENTRY(chacha20_arm) ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -#if __ARM_ARCH__<7 && !defined(__thumb2__) - sub r14,pc,#16 @ ChaCha20_ctr32 -#else - adr r14,.LChaCha20_ctr32 -#endif cmp r2,#0 @ len==0? -#ifdef __thumb2__ +#ifdef __thumb2__ itt eq #endif addeq sp,sp,#4*3 - beq .Lno_data -#if __ARM_MAX_ARCH__>=7 - cmp r2,#192 @ test len - bls .Lshort - ldr r4,[r14,#-24] - ldr r4,[r14,r4] -# ifdef __APPLE__ - ldr r4,[r4] -# endif - tst r4,#ARMV7_NEON - bne .LChaCha20_neon -.Lshort: -#endif + beq .Lno_data_arm ldmia r12,{r4-r7} @ load counter and nonce sub sp,sp,#4*(16) @ off-load area - sub r14,r14,#64 @ .Lsigma +#if __LINUX_ARM_ARCH__ < 7 && !defined(__thumb2__) + sub r14,pc,#100 @ .Lsigma +#else + adr r14,.Lsigma @ .Lsigma +#endif stmdb sp!,{r4-r7} @ copy counter and nonce ldmia r3,{r4-r11} @ load key ldmia r14,{r0-r3} @ load sigma @@ -191,7 +171,7 @@ ChaCha20_ctr32: @ rx and second half at sp+4*(16+8) cmp r11,#64 @ done yet? -#ifdef __thumb2__ +#ifdef __thumb2__ itete lo #endif addlo r12,sp,#4*(0) @ shortcut or ... @@ -202,49 +182,49 @@ ChaCha20_ctr32: ldr r8,[sp,#4*(0)] @ load key material ldr r9,[sp,#4*(1)] -#if __ARM_ARCH__>=6 || !defined(__ARMEB__) -# if __ARM_ARCH__<7 +#if __LINUX_ARM_ARCH__ >= 6 || !defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ < 7 orr r10,r12,r14 tst r10,#3 @ are input and output aligned? ldr r10,[sp,#4*(2)] bne .Lunaligned cmp r11,#64 @ restore flags -# else +#else ldr r10,[sp,#4*(2)] -# endif +#endif ldr r11,[sp,#4*(3)] add r0,r0,r8 @ accumulate key material add r1,r1,r9 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] add r2,r2,r10 add r3,r3,r11 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r0,r0,r8 @ xor with input eorhs r1,r1,r9 add r8,sp,#4*(4) str r0,[r14],#16 @ store output -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r2,r2,r10 eorhs r3,r3,r11 ldmia r8,{r8-r11} @ load key material @@ -254,34 +234,34 @@ ChaCha20_ctr32: add r4,r8,r4,ror#13 @ accumulate key material add r5,r9,r5,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] add r6,r10,r6,ror#13 add r7,r11,r7,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r4,r4,r8 eorhs r5,r5,r9 add r8,sp,#4*(8) str r4,[r14],#16 @ store output -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r6,r6,r10 eorhs r7,r7,r11 str r5,[r14,#-12] @@ -294,39 +274,39 @@ ChaCha20_ctr32: add r0,r0,r8 @ accumulate key material add r1,r1,r9 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it add r2,r2,r10 add r3,r3,r11 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r0,r0,r8 eorhs r1,r1,r9 add r8,sp,#4*(12) str r0,[r14],#16 @ store output -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r2,r2,r10 eorhs r3,r3,r11 str r1,[r14,#-12] @@ -336,79 +316,79 @@ ChaCha20_ctr32: add r4,r8,r4,ror#24 @ accumulate key material add r5,r9,r5,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif addhi r8,r8,#1 @ next counter value strhi r8,[sp,#4*(12)] @ save next counter value -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] add r6,r10,r6,ror#24 add r7,r11,r7,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r4,r4,r8 eorhs r5,r5,r9 -# ifdef __thumb2__ +#ifdef __thumb2__ it ne -# endif +#endif ldrne r8,[sp,#4*(32+2)] @ re-load len -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r6,r6,r10 eorhs r7,r7,r11 str r4,[r14],#16 @ store output str r5,[r14,#-12] -# ifdef __thumb2__ +#ifdef __thumb2__ it hs -# endif +#endif subhs r11,r8,#64 @ len-=64 str r6,[r14,#-8] str r7,[r14,#-4] bhi .Loop_outer beq .Ldone -# if __ARM_ARCH__<7 +#if __LINUX_ARM_ARCH__ < 7 b .Ltail .align 4 .Lunaligned: @ unaligned endian-neutral path cmp r11,#64 @ restore flags -# endif #endif -#if __ARM_ARCH__<7 +#endif +#if __LINUX_ARM_ARCH__ < 7 ldr r11,[sp,#4*(3)] add r0,r8,r0 @ accumulate key material add r1,r9,r1 add r2,r10,r2 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -416,53 +396,53 @@ ChaCha20_ctr32: eor r0,r8,r0 @ xor with input (or zero) eor r1,r9,r1 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r2,r10,r2 strb r0,[r14],#16 @ store output eor r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r1,[r14,#-12] eor r0,r8,r0,lsr#8 strb r2,[r14,#-8] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r3,[r14,#-4] eor r2,r10,r2,lsr#8 strb r0,[r14,#-15] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r1,[r14,#-11] eor r0,r8,r0,lsr#8 strb r2,[r14,#-7] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r3,[r14,#-3] eor r2,r10,r2,lsr#8 strb r0,[r14,#-14] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r1,[r14,#-10] @@ -482,18 +462,18 @@ ChaCha20_ctr32: add r4,r8,r4,ror#13 @ accumulate key material add r5,r9,r5,ror#13 add r6,r10,r6,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r7,r11,r7,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -501,53 +481,53 @@ ChaCha20_ctr32: eor r4,r8,r4 @ xor with input (or zero) eor r5,r9,r5 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r6,r10,r6 strb r4,[r14],#16 @ store output eor r7,r11,r7 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r5,[r14,#-12] eor r4,r8,r4,lsr#8 strb r6,[r14,#-8] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r7,[r14,#-4] eor r6,r10,r6,lsr#8 strb r4,[r14,#-15] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r5,[r14,#-11] eor r4,r8,r4,lsr#8 strb r6,[r14,#-7] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r7,[r14,#-3] eor r6,r10,r6,lsr#8 strb r4,[r14,#-14] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r5,[r14,#-10] @@ -564,26 +544,26 @@ ChaCha20_ctr32: add r8,sp,#4*(4+4) ldmia r8,{r8-r11} @ load key material ldmia r0,{r0-r7} @ load second half -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif strhi r10,[sp,#4*(16+10)] @ copy "rx" strhi r11,[sp,#4*(16+11)] @ copy "rx" add r0,r8,r0 @ accumulate key material add r1,r9,r1 add r2,r10,r2 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -591,53 +571,53 @@ ChaCha20_ctr32: eor r0,r8,r0 @ xor with input (or zero) eor r1,r9,r1 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r2,r10,r2 strb r0,[r14],#16 @ store output eor r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r1,[r14,#-12] eor r0,r8,r0,lsr#8 strb r2,[r14,#-8] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r3,[r14,#-4] eor r2,r10,r2,lsr#8 strb r0,[r14,#-15] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r1,[r14,#-11] eor r0,r8,r0,lsr#8 strb r2,[r14,#-7] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r3,[r14,#-3] eor r2,r10,r2,lsr#8 strb r0,[r14,#-14] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r1,[r14,#-10] @@ -654,25 +634,25 @@ ChaCha20_ctr32: add r8,sp,#4*(4+8) ldmia r8,{r8-r11} @ load key material add r4,r8,r4,ror#24 @ accumulate key material -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif addhi r8,r8,#1 @ next counter value strhi r8,[sp,#4*(12)] @ save next counter value add r5,r9,r5,ror#24 add r6,r10,r6,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r7,r11,r7,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -680,53 +660,53 @@ ChaCha20_ctr32: eor r4,r8,r4 @ xor with input (or zero) eor r5,r9,r5 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r6,r10,r6 strb r4,[r14],#16 @ store output eor r7,r11,r7 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r5,[r14,#-12] eor r4,r8,r4,lsr#8 strb r6,[r14,#-8] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r7,[r14,#-4] eor r6,r10,r6,lsr#8 strb r4,[r14,#-15] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r5,[r14,#-11] eor r4,r8,r4,lsr#8 strb r6,[r14,#-7] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r7,[r14,#-3] eor r6,r10,r6,lsr#8 strb r4,[r14,#-14] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r5,[r14,#-10] @@ -740,13 +720,13 @@ ChaCha20_ctr32: eor r7,r11,r7,lsr#8 strb r6,[r14,#-5] strb r7,[r14,#-1] -# ifdef __thumb2__ +#ifdef __thumb2__ it ne -# endif +#endif ldrne r8,[sp,#4*(32+2)] @ re-load len -# ifdef __thumb2__ +#ifdef __thumb2__ it hs -# endif +#endif subhs r11,r8,#64 @ len-=64 bhi .Loop_outer @@ -768,20 +748,33 @@ ChaCha20_ctr32: .Ldone: add sp,sp,#4*(32+3) -.Lno_data: +.Lno_data_arm: ldmia sp!,{r4-r11,pc} -.size ChaCha20_ctr32,.-ChaCha20_ctr32 -#if __ARM_MAX_ARCH__>=7 +ENDPROC(chacha20_arm) + +#ifdef CONFIG_KERNEL_MODE_NEON +.align 5 +.Lsigma2: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral +.Lone2: +.long 1,0,0,0 +.word -1 + .arch armv7-a .fpu neon -.type ChaCha20_neon,%function .align 5 -ChaCha20_neon: +ENTRY(chacha20_neon) ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -.LChaCha20_neon: - adr r14,.Lsigma + cmp r2,#0 @ len==0? +#ifdef __thumb2__ + itt eq +#endif + addeq sp,sp,#4*3 + beq .Lno_data_neon +.Lchacha20_neon_begin: + adr r14,.Lsigma2 vstmdb sp!,{d8-d15} @ ABI spec says so stmdb sp!,{r0-r3} @@ -1121,12 +1114,12 @@ ChaCha20_neon: ldr r10,[r12,#-8] add r3,r3,r11 ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif +#endif eor r0,r0,r8 @ xor with input add r8,sp,#4*(4) eor r1,r1,r9 @@ -1146,12 +1139,12 @@ ChaCha20_neon: ldr r10,[r12,#-8] add r7,r11,r7,ror#13 ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif eor r4,r4,r8 add r8,sp,#4*(8) eor r5,r5,r9 @@ -1170,24 +1163,24 @@ ChaCha20_neon: ldr r8,[r12],#16 @ load input add r1,r1,r9 ldr r9,[r12,#-12] -# ifdef __thumb2__ +#ifdef __thumb2__ it hi -# endif +#endif strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it add r2,r2,r10 ldr r10,[r12,#-8] -# ifdef __thumb2__ +#ifdef __thumb2__ it hi -# endif +#endif strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it add r3,r3,r11 ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif +#endif eor r0,r0,r8 add r8,sp,#4*(12) eor r1,r1,r9 @@ -1210,16 +1203,16 @@ ChaCha20_neon: add r7,r11,r7,ror#24 ldr r10,[r12,#-8] ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif eor r4,r4,r8 -# ifdef __thumb2__ +#ifdef __thumb2__ it hi -# endif +#endif ldrhi r8,[sp,#4*(32+2)] @ re-load len eor r5,r5,r9 eor r6,r6,r10 @@ -1379,7 +1372,7 @@ ChaCha20_neon: add r6,r10,r6,ror#13 add r7,r11,r7,ror#13 ldmia r8,{r8-r11} @ load key material -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 @@ -1388,7 +1381,7 @@ ChaCha20_neon: rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif stmia sp,{r0-r7} add r0,sp,#4*(16+8) @@ -1408,7 +1401,7 @@ ChaCha20_neon: add r6,r10,r6,ror#24 add r7,r11,r7,ror#24 ldr r11,[sp,#4*(32+2)] @ re-load len -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 @@ -1417,7 +1410,7 @@ ChaCha20_neon: rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif stmia r8,{r0-r7} add r10,sp,#4*(0) sub r11,r11,#64*3 @ len-=64*3 @@ -1434,7 +1427,7 @@ ChaCha20_neon: add sp,sp,#4*(32+4) vldmia sp,{d8-d15} add sp,sp,#4*(16+3) +.Lno_data_neon: ldmia sp!,{r4-r11,pc} -.size ChaCha20_neon,.-ChaCha20_neon -.comm OPENSSL_armcap_P,4,4 +ENDPROC(chacha20_neon) #endif diff --git a/lib/zinc/chacha20/chacha20-arm64-cryptogams.S b/lib/zinc/chacha20/chacha20-arm64-cryptogams.S index 4d029bfdad3a..1ae11a5c5a14 100644 --- a/lib/zinc/chacha20/chacha20-arm64-cryptogams.S +++ b/lib/zinc/chacha20/chacha20-arm64-cryptogams.S @@ -1,46 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from CRYPTOGAMS. */ -#include "arm_arch.h" +#include <linux/linkage.h> .text - - - .align 5 .Lsigma: .quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral .Lone: .long 1,0,0,0 -.LOPENSSL_armcap_P: -#ifdef __ILP32__ -.long OPENSSL_armcap_P-. -#else -.quad OPENSSL_armcap_P-. -#endif -.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 2 -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function .align 5 -ChaCha20_ctr32: +ENTRY(chacha20_arm) cbz x2,.Labort - adr x5,.LOPENSSL_armcap_P - cmp x2,#192 - b.lo .Lshort -#ifdef __ILP32__ - ldrsw x6,[x5] -#else - ldr x6,[x5] -#endif - ldr w17,[x6,x5] - tst w17,#ARMV7_NEON - b.ne ChaCha20_neon -.Lshort: stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -56,7 +34,7 @@ ChaCha20_ctr32: ldp x24,x25,[x3] // load key ldp x26,x27,[x3,#16] ldp x28,x30,[x4] // load counter -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ ror x24,x24,#32 ror x25,x25,#32 ror x26,x26,#32 @@ -217,7 +195,7 @@ ChaCha20_ctr32: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -273,7 +251,7 @@ ChaCha20_ctr32: add x15,x15,x16,lsl#32 add x17,x17,x19,lsl#32 add x20,x20,x21,lsl#32 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -309,11 +287,13 @@ ChaCha20_ctr32: ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 ret -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +ENDPROC(chacha20_arm) -.type ChaCha20_neon,%function +#ifdef CONFIG_KERNEL_MODE_NEON .align 5 -ChaCha20_neon: +ENTRY(chacha20_neon) + cbz x2,.Labort_neon + stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -336,7 +316,7 @@ ChaCha20_neon: ldp x28,x30,[x4] // load counter ld1 {v27.4s},[x4] ld1 {v31.4s},[x5] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev64 v24.4s,v24.4s ror x24,x24,#32 ror x25,x25,#32 @@ -634,7 +614,7 @@ ChaCha20_neon: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -713,7 +693,7 @@ ChaCha20_neon: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -803,19 +783,6 @@ ChaCha20_neon: ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 ret -.size ChaCha20_neon,.-ChaCha20_neon -.type ChaCha20_512_neon,%function -.align 5 -ChaCha20_512_neon: - stp x29,x30,[sp,#-96]! - add x29,sp,#0 - - adr x5,.Lsigma - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] .L512_or_more_neon: sub sp,sp,#128+64 @@ -828,7 +795,7 @@ ChaCha20_512_neon: ldp x28,x30,[x4] // load counter ld1 {v27.4s},[x4] ld1 {v31.4s},[x5] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev64 v24.4s,v24.4s ror x24,x24,#32 ror x25,x25,#32 @@ -1341,7 +1308,7 @@ ChaCha20_512_neon: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -1855,7 +1822,7 @@ ChaCha20_512_neon: add x1,x1,#64 add v21.4s,v21.4s,v25.4s -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -1969,5 +1936,7 @@ ChaCha20_512_neon: ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 +.Labort_neon: ret -.size ChaCha20_512_neon,.-ChaCha20_512_neon +ENDPROC(chacha20_neon) +#endif -- 2.19.0
WARNING: multiple messages have this Message-ID (diff)
From: Jason@zx2c4.com (Jason A. Donenfeld) To: linux-arm-kernel@lists.infradead.org Subject: [PATCH net-next v7 08/28] zinc: port Andy Polyakov's ChaCha20 ARM and ARM64 implementations Date: Sat, 6 Oct 2018 04:56:49 +0200 [thread overview] Message-ID: <20181006025709.4019-9-Jason@zx2c4.com> (raw) In-Reply-To: <20181006025709.4019-1-Jason@zx2c4.com> These port and prepare Andy Polyakov's implementations for the kernel, but don't actually wire up any of the code yet. The wiring will be done in a subsequent commit, since we'll need to merge these implementations with another one. We make a few small changes to the assembly: - Entries and exits use the proper kernel convention macro. - CPU feature checking is done in C by the glue code, so that has been removed from the assembly. - The function names have been renamed to fit kernel conventions. - Labels have been renamed (prefixed with .L) to fit kernel conventions. - Constants have been rearranged so that they are closer to the code that is using them. [ARM only] - The neon code can jump to the scalar code when it makes sense to do so. - The neon_512 function as a separate function has been removed, leaving the decision up to the main neon entry point. [ARM64 only] Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Cc: Russell King <linux@armlinux.org.uk> Cc: linux-arm-kernel at lists.infradead.org Cc: Samuel Neves <sneves@dei.uc.pt> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: kernel-hardening at lists.openwall.com Cc: linux-crypto at vger.kernel.org --- lib/zinc/chacha20/chacha20-arm-cryptogams.S | 367 +++++++++--------- lib/zinc/chacha20/chacha20-arm64-cryptogams.S | 75 ++-- 2 files changed, 202 insertions(+), 240 deletions(-) diff --git a/lib/zinc/chacha20/chacha20-arm-cryptogams.S b/lib/zinc/chacha20/chacha20-arm-cryptogams.S index 05a3a9e6e93f..770bab469171 100644 --- a/lib/zinc/chacha20/chacha20-arm-cryptogams.S +++ b/lib/zinc/chacha20/chacha20-arm-cryptogams.S @@ -1,9 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from CRYPTOGAMS. */ -#include "arm_arch.h" +#include <linux/linkage.h> .text #if defined(__thumb2__) || defined(__clang__) @@ -24,48 +27,25 @@ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral .Lone: .long 1,0,0,0 -.Lrot8: -.long 0x02010003,0x06050407 -#if __ARM_MAX_ARCH__>=7 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.LChaCha20_ctr32 -#else .word -1 -#endif -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function .align 5 -ChaCha20_ctr32: -.LChaCha20_ctr32: +ENTRY(chacha20_arm) ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -#if __ARM_ARCH__<7 && !defined(__thumb2__) - sub r14,pc,#16 @ ChaCha20_ctr32 -#else - adr r14,.LChaCha20_ctr32 -#endif cmp r2,#0 @ len==0? -#ifdef __thumb2__ +#ifdef __thumb2__ itt eq #endif addeq sp,sp,#4*3 - beq .Lno_data -#if __ARM_MAX_ARCH__>=7 - cmp r2,#192 @ test len - bls .Lshort - ldr r4,[r14,#-24] - ldr r4,[r14,r4] -# ifdef __APPLE__ - ldr r4,[r4] -# endif - tst r4,#ARMV7_NEON - bne .LChaCha20_neon -.Lshort: -#endif + beq .Lno_data_arm ldmia r12,{r4-r7} @ load counter and nonce sub sp,sp,#4*(16) @ off-load area - sub r14,r14,#64 @ .Lsigma +#if __LINUX_ARM_ARCH__ < 7 && !defined(__thumb2__) + sub r14,pc,#100 @ .Lsigma +#else + adr r14,.Lsigma @ .Lsigma +#endif stmdb sp!,{r4-r7} @ copy counter and nonce ldmia r3,{r4-r11} @ load key ldmia r14,{r0-r3} @ load sigma @@ -191,7 +171,7 @@ ChaCha20_ctr32: @ rx and second half@sp+4*(16+8) cmp r11,#64 @ done yet? -#ifdef __thumb2__ +#ifdef __thumb2__ itete lo #endif addlo r12,sp,#4*(0) @ shortcut or ... @@ -202,49 +182,49 @@ ChaCha20_ctr32: ldr r8,[sp,#4*(0)] @ load key material ldr r9,[sp,#4*(1)] -#if __ARM_ARCH__>=6 || !defined(__ARMEB__) -# if __ARM_ARCH__<7 +#if __LINUX_ARM_ARCH__ >= 6 || !defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ < 7 orr r10,r12,r14 tst r10,#3 @ are input and output aligned? ldr r10,[sp,#4*(2)] bne .Lunaligned cmp r11,#64 @ restore flags -# else +#else ldr r10,[sp,#4*(2)] -# endif +#endif ldr r11,[sp,#4*(3)] add r0,r0,r8 @ accumulate key material add r1,r1,r9 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] add r2,r2,r10 add r3,r3,r11 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r0,r0,r8 @ xor with input eorhs r1,r1,r9 add r8,sp,#4*(4) str r0,[r14],#16 @ store output -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r2,r2,r10 eorhs r3,r3,r11 ldmia r8,{r8-r11} @ load key material @@ -254,34 +234,34 @@ ChaCha20_ctr32: add r4,r8,r4,ror#13 @ accumulate key material add r5,r9,r5,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] add r6,r10,r6,ror#13 add r7,r11,r7,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r4,r4,r8 eorhs r5,r5,r9 add r8,sp,#4*(8) str r4,[r14],#16 @ store output -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r6,r6,r10 eorhs r7,r7,r11 str r5,[r14,#-12] @@ -294,39 +274,39 @@ ChaCha20_ctr32: add r0,r0,r8 @ accumulate key material add r1,r1,r9 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it add r2,r2,r10 add r3,r3,r11 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r0,r0,r8 eorhs r1,r1,r9 add r8,sp,#4*(12) str r0,[r14],#16 @ store output -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r2,r2,r10 eorhs r3,r3,r11 str r1,[r14,#-12] @@ -336,79 +316,79 @@ ChaCha20_ctr32: add r4,r8,r4,ror#24 @ accumulate key material add r5,r9,r5,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif addhi r8,r8,#1 @ next counter value strhi r8,[sp,#4*(12)] @ save next counter value -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r8,[r12],#16 @ load input ldrhs r9,[r12,#-12] add r6,r10,r6,ror#24 add r7,r11,r7,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhs r10,[r12,#-8] ldrhs r11,[r12,#-4] -# if __ARM_ARCH__>=6 && defined(__ARMEB__) +#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__) rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif -# ifdef __thumb2__ +#endif +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r4,r4,r8 eorhs r5,r5,r9 -# ifdef __thumb2__ +#ifdef __thumb2__ it ne -# endif +#endif ldrne r8,[sp,#4*(32+2)] @ re-load len -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif eorhs r6,r6,r10 eorhs r7,r7,r11 str r4,[r14],#16 @ store output str r5,[r14,#-12] -# ifdef __thumb2__ +#ifdef __thumb2__ it hs -# endif +#endif subhs r11,r8,#64 @ len-=64 str r6,[r14,#-8] str r7,[r14,#-4] bhi .Loop_outer beq .Ldone -# if __ARM_ARCH__<7 +#if __LINUX_ARM_ARCH__ < 7 b .Ltail .align 4 .Lunaligned: @ unaligned endian-neutral path cmp r11,#64 @ restore flags -# endif #endif -#if __ARM_ARCH__<7 +#endif +#if __LINUX_ARM_ARCH__ < 7 ldr r11,[sp,#4*(3)] add r0,r8,r0 @ accumulate key material add r1,r9,r1 add r2,r10,r2 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -416,53 +396,53 @@ ChaCha20_ctr32: eor r0,r8,r0 @ xor with input (or zero) eor r1,r9,r1 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r2,r10,r2 strb r0,[r14],#16 @ store output eor r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r1,[r14,#-12] eor r0,r8,r0,lsr#8 strb r2,[r14,#-8] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r3,[r14,#-4] eor r2,r10,r2,lsr#8 strb r0,[r14,#-15] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r1,[r14,#-11] eor r0,r8,r0,lsr#8 strb r2,[r14,#-7] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r3,[r14,#-3] eor r2,r10,r2,lsr#8 strb r0,[r14,#-14] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r1,[r14,#-10] @@ -482,18 +462,18 @@ ChaCha20_ctr32: add r4,r8,r4,ror#13 @ accumulate key material add r5,r9,r5,ror#13 add r6,r10,r6,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r7,r11,r7,ror#13 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -501,53 +481,53 @@ ChaCha20_ctr32: eor r4,r8,r4 @ xor with input (or zero) eor r5,r9,r5 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r6,r10,r6 strb r4,[r14],#16 @ store output eor r7,r11,r7 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r5,[r14,#-12] eor r4,r8,r4,lsr#8 strb r6,[r14,#-8] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r7,[r14,#-4] eor r6,r10,r6,lsr#8 strb r4,[r14,#-15] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r5,[r14,#-11] eor r4,r8,r4,lsr#8 strb r6,[r14,#-7] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r7,[r14,#-3] eor r6,r10,r6,lsr#8 strb r4,[r14,#-14] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r5,[r14,#-10] @@ -564,26 +544,26 @@ ChaCha20_ctr32: add r8,sp,#4*(4+4) ldmia r8,{r8-r11} @ load key material ldmia r0,{r0-r7} @ load second half -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif strhi r10,[sp,#4*(16+10)] @ copy "rx" strhi r11,[sp,#4*(16+11)] @ copy "rx" add r0,r8,r0 @ accumulate key material add r1,r9,r1 add r2,r10,r2 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -591,53 +571,53 @@ ChaCha20_ctr32: eor r0,r8,r0 @ xor with input (or zero) eor r1,r9,r1 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r2,r10,r2 strb r0,[r14],#16 @ store output eor r3,r11,r3 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r1,[r14,#-12] eor r0,r8,r0,lsr#8 strb r2,[r14,#-8] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r3,[r14,#-4] eor r2,r10,r2,lsr#8 strb r0,[r14,#-15] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r1,[r14,#-11] eor r0,r8,r0,lsr#8 strb r2,[r14,#-7] eor r1,r9,r1,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r3,[r14,#-3] eor r2,r10,r2,lsr#8 strb r0,[r14,#-14] eor r3,r11,r3,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r1,[r14,#-10] @@ -654,25 +634,25 @@ ChaCha20_ctr32: add r8,sp,#4*(4+8) ldmia r8,{r8-r11} @ load key material add r4,r8,r4,ror#24 @ accumulate key material -# ifdef __thumb2__ +#ifdef __thumb2__ itt hi -# endif +#endif addhi r8,r8,#1 @ next counter value strhi r8,[sp,#4*(12)] @ save next counter value add r5,r9,r5,ror#24 add r6,r10,r6,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r8,r8,r8 @ zero or ... ldrhsb r8,[r12],#16 @ ... load input eorlo r9,r9,r9 ldrhsb r9,[r12,#-12] add r7,r11,r7,ror#24 -# ifdef __thumb2__ +#ifdef __thumb2__ itete lo -# endif +#endif eorlo r10,r10,r10 ldrhsb r10,[r12,#-8] eorlo r11,r11,r11 @@ -680,53 +660,53 @@ ChaCha20_ctr32: eor r4,r8,r4 @ xor with input (or zero) eor r5,r9,r5 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-15] @ load more input ldrhsb r9,[r12,#-11] eor r6,r10,r6 strb r4,[r14],#16 @ store output eor r7,r11,r7 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-7] ldrhsb r11,[r12,#-3] strb r5,[r14,#-12] eor r4,r8,r4,lsr#8 strb r6,[r14,#-8] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-14] @ load more input ldrhsb r9,[r12,#-10] strb r7,[r14,#-4] eor r6,r10,r6,lsr#8 strb r4,[r14,#-15] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-6] ldrhsb r11,[r12,#-2] strb r5,[r14,#-11] eor r4,r8,r4,lsr#8 strb r6,[r14,#-7] eor r5,r9,r5,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r8,[r12,#-13] @ load more input ldrhsb r9,[r12,#-9] strb r7,[r14,#-3] eor r6,r10,r6,lsr#8 strb r4,[r14,#-14] eor r7,r11,r7,lsr#8 -# ifdef __thumb2__ +#ifdef __thumb2__ itt hs -# endif +#endif ldrhsb r10,[r12,#-5] ldrhsb r11,[r12,#-1] strb r5,[r14,#-10] @@ -740,13 +720,13 @@ ChaCha20_ctr32: eor r7,r11,r7,lsr#8 strb r6,[r14,#-5] strb r7,[r14,#-1] -# ifdef __thumb2__ +#ifdef __thumb2__ it ne -# endif +#endif ldrne r8,[sp,#4*(32+2)] @ re-load len -# ifdef __thumb2__ +#ifdef __thumb2__ it hs -# endif +#endif subhs r11,r8,#64 @ len-=64 bhi .Loop_outer @@ -768,20 +748,33 @@ ChaCha20_ctr32: .Ldone: add sp,sp,#4*(32+3) -.Lno_data: +.Lno_data_arm: ldmia sp!,{r4-r11,pc} -.size ChaCha20_ctr32,.-ChaCha20_ctr32 -#if __ARM_MAX_ARCH__>=7 +ENDPROC(chacha20_arm) + +#ifdef CONFIG_KERNEL_MODE_NEON +.align 5 +.Lsigma2: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral +.Lone2: +.long 1,0,0,0 +.word -1 + .arch armv7-a .fpu neon -.type ChaCha20_neon,%function .align 5 -ChaCha20_neon: +ENTRY(chacha20_neon) ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -.LChaCha20_neon: - adr r14,.Lsigma + cmp r2,#0 @ len==0? +#ifdef __thumb2__ + itt eq +#endif + addeq sp,sp,#4*3 + beq .Lno_data_neon +.Lchacha20_neon_begin: + adr r14,.Lsigma2 vstmdb sp!,{d8-d15} @ ABI spec says so stmdb sp!,{r0-r3} @@ -1121,12 +1114,12 @@ ChaCha20_neon: ldr r10,[r12,#-8] add r3,r3,r11 ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif +#endif eor r0,r0,r8 @ xor with input add r8,sp,#4*(4) eor r1,r1,r9 @@ -1146,12 +1139,12 @@ ChaCha20_neon: ldr r10,[r12,#-8] add r7,r11,r7,ror#13 ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif eor r4,r4,r8 add r8,sp,#4*(8) eor r5,r5,r9 @@ -1170,24 +1163,24 @@ ChaCha20_neon: ldr r8,[r12],#16 @ load input add r1,r1,r9 ldr r9,[r12,#-12] -# ifdef __thumb2__ +#ifdef __thumb2__ it hi -# endif +#endif strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it add r2,r2,r10 ldr r10,[r12,#-8] -# ifdef __thumb2__ +#ifdef __thumb2__ it hi -# endif +#endif strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it add r3,r3,r11 ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 rev r3,r3 -# endif +#endif eor r0,r0,r8 add r8,sp,#4*(12) eor r1,r1,r9 @@ -1210,16 +1203,16 @@ ChaCha20_neon: add r7,r11,r7,ror#24 ldr r10,[r12,#-8] ldr r11,[r12,#-4] -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r4,r4 rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif eor r4,r4,r8 -# ifdef __thumb2__ +#ifdef __thumb2__ it hi -# endif +#endif ldrhi r8,[sp,#4*(32+2)] @ re-load len eor r5,r5,r9 eor r6,r6,r10 @@ -1379,7 +1372,7 @@ ChaCha20_neon: add r6,r10,r6,ror#13 add r7,r11,r7,ror#13 ldmia r8,{r8-r11} @ load key material -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 @@ -1388,7 +1381,7 @@ ChaCha20_neon: rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif stmia sp,{r0-r7} add r0,sp,#4*(16+8) @@ -1408,7 +1401,7 @@ ChaCha20_neon: add r6,r10,r6,ror#24 add r7,r11,r7,ror#24 ldr r11,[sp,#4*(32+2)] @ re-load len -# ifdef __ARMEB__ +#ifdef __ARMEB__ rev r0,r0 rev r1,r1 rev r2,r2 @@ -1417,7 +1410,7 @@ ChaCha20_neon: rev r5,r5 rev r6,r6 rev r7,r7 -# endif +#endif stmia r8,{r0-r7} add r10,sp,#4*(0) sub r11,r11,#64*3 @ len-=64*3 @@ -1434,7 +1427,7 @@ ChaCha20_neon: add sp,sp,#4*(32+4) vldmia sp,{d8-d15} add sp,sp,#4*(16+3) +.Lno_data_neon: ldmia sp!,{r4-r11,pc} -.size ChaCha20_neon,.-ChaCha20_neon -.comm OPENSSL_armcap_P,4,4 +ENDPROC(chacha20_neon) #endif diff --git a/lib/zinc/chacha20/chacha20-arm64-cryptogams.S b/lib/zinc/chacha20/chacha20-arm64-cryptogams.S index 4d029bfdad3a..1ae11a5c5a14 100644 --- a/lib/zinc/chacha20/chacha20-arm64-cryptogams.S +++ b/lib/zinc/chacha20/chacha20-arm64-cryptogams.S @@ -1,46 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from CRYPTOGAMS. */ -#include "arm_arch.h" +#include <linux/linkage.h> .text - - - .align 5 .Lsigma: .quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral .Lone: .long 1,0,0,0 -.LOPENSSL_armcap_P: -#ifdef __ILP32__ -.long OPENSSL_armcap_P-. -#else -.quad OPENSSL_armcap_P-. -#endif -.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 2 -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function .align 5 -ChaCha20_ctr32: +ENTRY(chacha20_arm) cbz x2,.Labort - adr x5,.LOPENSSL_armcap_P - cmp x2,#192 - b.lo .Lshort -#ifdef __ILP32__ - ldrsw x6,[x5] -#else - ldr x6,[x5] -#endif - ldr w17,[x6,x5] - tst w17,#ARMV7_NEON - b.ne ChaCha20_neon -.Lshort: stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -56,7 +34,7 @@ ChaCha20_ctr32: ldp x24,x25,[x3] // load key ldp x26,x27,[x3,#16] ldp x28,x30,[x4] // load counter -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ ror x24,x24,#32 ror x25,x25,#32 ror x26,x26,#32 @@ -217,7 +195,7 @@ ChaCha20_ctr32: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -273,7 +251,7 @@ ChaCha20_ctr32: add x15,x15,x16,lsl#32 add x17,x17,x19,lsl#32 add x20,x20,x21,lsl#32 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -309,11 +287,13 @@ ChaCha20_ctr32: ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 ret -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +ENDPROC(chacha20_arm) -.type ChaCha20_neon,%function +#ifdef CONFIG_KERNEL_MODE_NEON .align 5 -ChaCha20_neon: +ENTRY(chacha20_neon) + cbz x2,.Labort_neon + stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -336,7 +316,7 @@ ChaCha20_neon: ldp x28,x30,[x4] // load counter ld1 {v27.4s},[x4] ld1 {v31.4s},[x5] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev64 v24.4s,v24.4s ror x24,x24,#32 ror x25,x25,#32 @@ -634,7 +614,7 @@ ChaCha20_neon: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -713,7 +693,7 @@ ChaCha20_neon: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -803,19 +783,6 @@ ChaCha20_neon: ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 ret -.size ChaCha20_neon,.-ChaCha20_neon -.type ChaCha20_512_neon,%function -.align 5 -ChaCha20_512_neon: - stp x29,x30,[sp,#-96]! - add x29,sp,#0 - - adr x5,.Lsigma - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] .L512_or_more_neon: sub sp,sp,#128+64 @@ -828,7 +795,7 @@ ChaCha20_512_neon: ldp x28,x30,[x4] // load counter ld1 {v27.4s},[x4] ld1 {v31.4s},[x5] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev64 v24.4s,v24.4s ror x24,x24,#32 ror x25,x25,#32 @@ -1341,7 +1308,7 @@ ChaCha20_512_neon: add x20,x20,x21,lsl#32 ldp x19,x21,[x1,#48] add x1,x1,#64 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -1855,7 +1822,7 @@ ChaCha20_512_neon: add x1,x1,#64 add v21.4s,v21.4s,v25.4s -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x5,x5 rev x7,x7 rev x9,x9 @@ -1969,5 +1936,7 @@ ChaCha20_512_neon: ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 +.Labort_neon: ret -.size ChaCha20_512_neon,.-ChaCha20_512_neon +ENDPROC(chacha20_neon) +#endif -- 2.19.0
next prev parent reply other threads:[~2018-10-06 2:56 UTC|newest] Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-10-06 2:56 [PATCH net-next v7 00/28] WireGuard: Secure Network Tunnel Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 01/28] ARM: makefile: use ARMv3M mode for RiscPC Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 02/28] asm: simd context helper API Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 03/28] zinc: introduce minimal cryptography library Jason A. Donenfeld 2018-10-08 23:22 ` Eric Biggers 2018-10-06 2:56 ` [PATCH net-next v7 04/28] zinc: ChaCha20 generic C implementation and selftest Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 05/28] zinc: import Andy Polyakov's ChaCha20 x86_64 implementation Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 06/28] zinc: " Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 07/28] zinc: import Andy Polyakov's ChaCha20 ARM and ARM64 implementations Jason A. Donenfeld 2018-10-06 2:56 ` Jason A. Donenfeld 2018-10-06 2:56 ` Jason A. Donenfeld [this message] 2018-10-06 2:56 ` [PATCH net-next v7 08/28] zinc: port " Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 09/28] zinc: " Jason A. Donenfeld 2018-10-06 2:56 ` Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 10/28] zinc: ChaCha20 MIPS32r2 implementation Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 11/28] zinc: Poly1305 generic C implementations and selftest Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 12/28] zinc: import Andy Polyakov's Poly1305 x86_64 implementation Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 13/28] zinc: " Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 14/28] zinc: import Andy Polyakov's Poly1305 ARM and ARM64 implementations Jason A. Donenfeld 2018-10-06 2:56 ` Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 15/28] zinc: " Jason A. Donenfeld 2018-10-06 2:56 ` Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 16/28] zinc: import Andy Polyakov's Poly1305 MIPS64 implementation Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 17/28] zinc: Poly1305 MIPS32r2 and MIPS64 implementations Jason A. Donenfeld 2018-10-06 2:56 ` [PATCH net-next v7 18/28] zinc: ChaCha20Poly1305 construction and selftest Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 19/28] zinc: BLAKE2s generic C implementation " Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 20/28] zinc: BLAKE2s x86_64 implementation Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 21/28] zinc: Curve25519 generic C implementations and selftest Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 22/28] zinc: Curve25519 x86_64 implementation Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 23/28] zinc: import Bernstein and Schwabe's Curve25519 ARM implementation Jason A. Donenfeld 2018-10-06 2:57 ` Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 24/28] zinc: " Jason A. Donenfeld 2018-10-06 2:57 ` Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 25/28] crypto: port Poly1305 to Zinc Jason A. Donenfeld 2018-10-08 23:21 ` Eric Biggers 2018-10-09 0:02 ` Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 26/28] crypto: port ChaCha20 " Jason A. Donenfeld 2018-10-06 13:07 ` Martin Willi 2018-10-06 13:07 ` Martin Willi 2018-10-06 2:57 ` [PATCH net-next v7 27/28] security/keys: rewrite big_key crypto to use Zinc Jason A. Donenfeld 2018-10-06 2:57 ` [PATCH net-next v7 28/28] net: WireGuard secure network tunnel Jason A. Donenfeld 2018-10-06 6:58 ` Jiri Pirko 2018-10-06 12:25 ` Jason A. Donenfeld 2018-10-07 17:26 ` Jason A. Donenfeld 2018-10-07 18:14 ` Lukas Wunner 2018-10-07 18:42 ` Andrew Lunn 2018-10-10 9:13 ` Jiri Pirko 2018-10-10 20:27 ` Jason A. Donenfeld 2018-10-11 5:36 ` Jiri Pirko 2018-10-06 19:43 ` Eugene Syromiatnikov 2018-10-08 1:06 ` Jason A. Donenfeld 2018-10-06 23:54 ` Andrew Lunn 2018-10-07 1:57 ` Jason A. Donenfeld 2018-10-07 16:48 ` Andrew Lunn 2018-10-07 16:55 ` Jason A. Donenfeld
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20181006025709.4019-9-Jason@zx2c4.com \ --to=jason@zx2c4.com \ --cc=akpm@linux-foundation.org \ --cc=davem@davemloft.net \ --cc=gregkh@linuxfoundation.org \ --cc=jeanphilippe.aumasson@gmail.com \ --cc=kernel-hardening@lists.openwall.com \ --cc=linux-arm-kernel@lists.infradead.org \ --cc=linux-crypto@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux@armlinux.org.uk \ --cc=luto@kernel.org \ --cc=netdev@vger.kernel.org \ --cc=sneves@dei.uc.pt \ --cc=torvalds@linux-foundation.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.