linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	David Miller <davem@davemloft.net>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	Samuel Neves <sneves@dei.uc.pt>, Arnd Bergmann <arnd@arndb.de>,
	Eric Biggers <ebiggers@google.com>,
	Andy Lutomirski <luto@kernel.org>,
	Martin Willi <martin@strongswan.org>,
	Rene van Dorst <opensource@vdorst.com>,
	David Sterba <dsterba@suse.com>
Subject: [PATCH v5 34/34] crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine
Date: Fri,  8 Nov 2019 13:22:40 +0100	[thread overview]
Message-ID: <20191108122240.28479-35-ardb@kernel.org> (raw)
In-Reply-To: <20191108122240.28479-1-ardb@kernel.org>

Reimplement the library routines to perform chacha20poly1305 en/decryption
on scatterlists, without [ab]using the [deprecated] blkcipher interface,
which is rather heavyweight and does things we don't really need.

Instead, we use the sg_miter API in a novel and clever way, to iterate
over the scatterlist in-place (i.e., source == destination, which is the
only way this library is expected to be used). That way, we don't have to
iterate over two scatterlists in parallel.

Another optimization is that, instead of relying on the blkcipher walker
to present the input in suitable chunks, we recognize that ChaCha is a
streamcipher, and so we can simply deal with partial blocks by keeping a
block of cipherstream on the stack and use crypto_xor() to mix it with
the in/output.

Finally, we omit the scatterwalk_and_copy() call if the last element of
the scatterlist covers the MAC as well (which is the common case),
avoiding the need to walk the scatterlist and kmap() the page twice.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/crypto/chacha20poly1305.h      |  11 ++
 lib/crypto/chacha20poly1305-selftest.c |  45 ++++++
 lib/crypto/chacha20poly1305.c          | 150 ++++++++++++++++++++
 3 files changed, 206 insertions(+)

diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
index ad3b1de58df8..234ee28078ef 100644
--- a/include/crypto/chacha20poly1305.h
+++ b/include/crypto/chacha20poly1305.h
@@ -7,6 +7,7 @@
 #define __CHACHA20POLY1305_H
 
 #include <linux/types.h>
+#include <linux/scatterlist.h>
 
 enum chacha20poly1305_lengths {
 	XCHACHA20POLY1305_NONCE_SIZE = 24,
@@ -34,4 +35,14 @@ bool __must_check xchacha20poly1305_decrypt(
 	const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
 	const u8 key[CHACHA20POLY1305_KEY_SIZE]);
 
+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+					 const u8 *ad, const size_t ad_len,
+					 const u64 nonce,
+					 const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+					 const u8 *ad, const size_t ad_len,
+					 const u64 nonce,
+					 const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
 #endif /* __CHACHA20POLY1305_H */
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index d1ed0f27cfdb..465de46dbdef 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -7250,6 +7250,7 @@ bool __init chacha20poly1305_selftest(void)
 	enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
 	size_t i;
 	u8 *computed_output = NULL, *heap_src = NULL;
+	struct scatterlist sg_src;
 	bool success = true, ret;
 
 	heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
@@ -7280,6 +7281,29 @@ bool __init chacha20poly1305_selftest(void)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
+		if (chacha20poly1305_enc_vectors[i].nlen != 8)
+			continue;
+		memcpy(heap_src, chacha20poly1305_enc_vectors[i].input,
+		       chacha20poly1305_enc_vectors[i].ilen);
+		sg_init_one(&sg_src, heap_src,
+			    chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE);
+		chacha20poly1305_encrypt_sg_inplace(&sg_src,
+			chacha20poly1305_enc_vectors[i].ilen,
+			chacha20poly1305_enc_vectors[i].assoc,
+			chacha20poly1305_enc_vectors[i].alen,
+			get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
+			chacha20poly1305_enc_vectors[i].key);
+		if (memcmp(heap_src,
+				   chacha20poly1305_enc_vectors[i].output,
+				   chacha20poly1305_enc_vectors[i].ilen +
+							POLY1305_DIGEST_SIZE)) {
+			pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n",
+			       i + 1);
+			success = false;
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
 		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
 		ret = chacha20poly1305_decrypt(computed_output,
@@ -7301,6 +7325,27 @@ bool __init chacha20poly1305_selftest(void)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
+		memcpy(heap_src, chacha20poly1305_dec_vectors[i].input,
+		       chacha20poly1305_dec_vectors[i].ilen);
+		sg_init_one(&sg_src, heap_src,
+			    chacha20poly1305_dec_vectors[i].ilen);
+		ret = chacha20poly1305_decrypt_sg_inplace(&sg_src,
+			chacha20poly1305_dec_vectors[i].ilen,
+			chacha20poly1305_dec_vectors[i].assoc,
+			chacha20poly1305_dec_vectors[i].alen,
+			get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
+			chacha20poly1305_dec_vectors[i].key);
+		if (!decryption_success(ret,
+			chacha20poly1305_dec_vectors[i].failure,
+			memcmp(heap_src, chacha20poly1305_dec_vectors[i].output,
+			       chacha20poly1305_dec_vectors[i].ilen -
+							POLY1305_DIGEST_SIZE))) {
+			pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
+			       i + 1);
+			success = false;
+		}
+	}
 
 	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
 		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index c12ddbe9eb92..821e5cc9b14e 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -11,6 +11,7 @@
 #include <crypto/chacha20poly1305.h>
 #include <crypto/chacha.h>
 #include <crypto/poly1305.h>
+#include <crypto/scatterwalk.h>
 
 #include <asm/unaligned.h>
 #include <linux/kernel.h>
@@ -205,6 +206,155 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
 }
 EXPORT_SYMBOL(xchacha20poly1305_decrypt);
 
+static
+bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
+				       const size_t src_len,
+				       const u8 *ad, const size_t ad_len,
+				       const u64 nonce,
+				       const u8 key[CHACHA20POLY1305_KEY_SIZE],
+				       int encrypt)
+{
+	const u8 *pad0 = page_address(ZERO_PAGE(0));
+	struct poly1305_desc_ctx poly1305_state;
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	struct sg_mapping_iter miter;
+	size_t partial = 0;
+	unsigned int flags;
+	bool ret = true;
+	int sl;
+	union {
+		struct {
+			u32 k[CHACHA_KEY_WORDS];
+			__le64 iv[2];
+		};
+		u8 block0[POLY1305_KEY_SIZE];
+		u8 chacha_stream[CHACHA_BLOCK_SIZE];
+		struct {
+			u8 mac[2][POLY1305_DIGEST_SIZE];
+		};
+		__le64 lens[2];
+	} b __aligned(16);
+
+	chacha_load_key(b.k, key);
+
+	b.iv[0] = 0;
+	b.iv[1] = cpu_to_le64(nonce);
+
+	chacha_init(chacha_state, b.k, (u8 *)b.iv);
+	chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
+	poly1305_init(&poly1305_state, b.block0);
+
+	if (unlikely(ad_len)) {
+		poly1305_update(&poly1305_state, ad, ad_len);
+		if (ad_len & 0xf)
+			poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
+	}
+
+	flags = SG_MITER_TO_SG;
+	if (!preemptible())
+		flags |= SG_MITER_ATOMIC;
+
+	sg_miter_start(&miter, src, sg_nents(src), flags);
+
+	for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
+		u8 *addr = miter.addr;
+		size_t length = min_t(size_t, sl, miter.length);
+
+		if (!encrypt)
+			poly1305_update(&poly1305_state, addr, length);
+
+		if (unlikely(partial)) {
+			size_t l = min(length, CHACHA_BLOCK_SIZE - partial);
+
+			crypto_xor(addr, b.chacha_stream + partial, l);
+			partial = (partial + l) & (CHACHA_BLOCK_SIZE - 1);
+
+			addr += l;
+			length -= l;
+		}
+
+		if (likely(length >= CHACHA_BLOCK_SIZE || length == sl)) {
+			size_t l = length;
+
+			if (unlikely(length < sl))
+				l &= ~(CHACHA_BLOCK_SIZE - 1);
+			chacha_crypt(chacha_state, addr, addr, l, 20);
+			addr += l;
+			length -= l;
+		}
+
+		if (unlikely(length > 0)) {
+			chacha_crypt(chacha_state, b.chacha_stream, pad0,
+				     CHACHA_BLOCK_SIZE, 20);
+			crypto_xor(addr, b.chacha_stream, length);
+			partial = length;
+		}
+
+		if (encrypt)
+			poly1305_update(&poly1305_state, miter.addr,
+					min_t(size_t, sl, miter.length));
+	}
+
+	if (src_len & 0xf)
+		poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf));
+
+	b.lens[0] = cpu_to_le64(ad_len);
+	b.lens[1] = cpu_to_le64(src_len);
+	poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
+
+	if (likely(sl <= -POLY1305_DIGEST_SIZE)) {
+		if (encrypt) {
+			poly1305_final(&poly1305_state,
+				       miter.addr + miter.length + sl);
+			ret = true;
+		} else {
+			poly1305_final(&poly1305_state, b.mac[0]);
+			ret = !crypto_memneq(b.mac[0],
+					     miter.addr + miter.length + sl,
+					     POLY1305_DIGEST_SIZE);
+		}
+	}
+
+	sg_miter_stop(&miter);
+
+	if (unlikely(sl > -POLY1305_DIGEST_SIZE)) {
+		poly1305_final(&poly1305_state, b.mac[1]);
+		scatterwalk_map_and_copy(b.mac[encrypt], src, src_len,
+					 sizeof(b.mac[1]), encrypt);
+		ret = encrypt ||
+		      !crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE);
+	}
+
+	memzero_explicit(chacha_state, sizeof(chacha_state));
+	memzero_explicit(&b, sizeof(b));
+
+	return ret;
+}
+
+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+					 const u8 *ad, const size_t ad_len,
+					 const u64 nonce,
+					 const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+	return chacha20poly1305_crypt_sg_inplace(src, src_len, ad, ad_len,
+						 nonce, key, 1);
+}
+EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace);
+
+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
+					 const u8 *ad, const size_t ad_len,
+					 const u64 nonce,
+					 const u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+	if (unlikely(src_len < POLY1305_DIGEST_SIZE))
+		return false;
+
+	return chacha20poly1305_crypt_sg_inplace(src,
+						 src_len - POLY1305_DIGEST_SIZE,
+						 ad, ad_len, nonce, key, 0);
+}
+EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
+
 static int __init mod_init(void)
 {
 	if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
-- 
2.20.1


  parent reply	other threads:[~2019-11-08 12:24 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-08 12:22 [PATCH v5 00/34] crypto: crypto API library interfaces for WireGuard Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 01/34] crypto: tidy up lib/crypto Kconfig and Makefile Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 02/34] crypto: chacha - move existing library code into lib/crypto Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 03/34] crypto: x86/chacha - depend on generic chacha library instead of crypto driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 04/34] crypto: x86/chacha - expose SIMD ChaCha routine as library function Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 05/34] crypto: arm64/chacha - depend on generic chacha library instead of crypto driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 06/34] crypto: arm64/chacha - expose arm64 ChaCha routine as library function Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 07/34] crypto: arm/chacha - import Eric Biggers's scalar accelerated ChaCha code Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 08/34] crypto: arm/chacha - remove dependency on generic ChaCha driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 09/34] crypto: arm/chacha - expose ARM ChaCha routine as library function Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 10/34] crypto: mips/chacha - import 32r2 ChaCha code from Zinc Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 11/34] crypto: mips/chacha - wire up accelerated 32r2 " Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 12/34] crypto: chacha - unexport chacha_generic routines Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 13/34] crypto: poly1305 - move core routines into a separate library Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 14/34] crypto: x86/poly1305 - unify Poly1305 state struct with generic code Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 15/34] crypto: poly1305 - expose init/update/final library interface Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 16/34] crypto: x86/poly1305 - depend on generic library not generic shash Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 17/34] crypto: x86/poly1305 - expose existing driver as poly1305 library Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 18/34] crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 19/34] crypto: arm/poly1305 " Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 20/34] crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 21/34] int128: move __uint128_t compiler test to Kconfig Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 22/34] crypto: BLAKE2s - generic C library implementation and selftest Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 23/34] crypto: testmgr - add test cases for Blake2s Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 24/34] crypto: blake2s - implement generic shash driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 25/34] crypto: BLAKE2s - x86_64 SIMD implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 26/34] crypto: Curve25519 - generic C library implementations Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 27/34] crypto: curve25519 - add kpp selftest Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 28/34] crypto: curve25519 - implement generic KPP driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 29/34] crypto: lib/curve25519 - work around Clang stack spilling issue Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 30/34] crypto: Curve25519 - x86_64 library and KPP implementations Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 31/34] crypto: arm - import Bernstein and Schwabe's Curve25519 ARM implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 32/34] crypto: arm/Curve25519 - wire up NEON implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 33/34] crypto: chacha20poly1305 - import construction and selftest from Zinc Ard Biesheuvel
2019-11-08 12:22 ` Ard Biesheuvel [this message]
2019-11-15  6:07 ` [PATCH v5 00/34] crypto: crypto API library interfaces for WireGuard Herbert Xu
     [not found]   ` <CAHmME9oOfhv6RN00m1c6c5qELC5dzFKS=mgDBQ-stVEWu00p_A@mail.gmail.com>
2019-11-15  9:09     ` Herbert Xu
2019-11-19 15:18       ` Jason A. Donenfeld
2019-11-19 15:34         ` Ard Biesheuvel
2019-11-19 15:44           ` Jason A. Donenfeld
2019-11-19 15:59             ` Ard Biesheuvel
2019-11-19 16:23             ` Eric Biggers
2019-11-19 21:43               ` Jordan Glover

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191108122240.28479-35-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=arnd@arndb.de \
    --cc=davem@davemloft.net \
    --cc=dsterba@suse.com \
    --cc=ebiggers@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=martin@strongswan.org \
    --cc=opensource@vdorst.com \
    --cc=sneves@dei.uc.pt \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).