From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S272272AbTHIHp3 (ORCPT ); Sat, 9 Aug 2003 03:45:29 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S272274AbTHIHp3 (ORCPT ); Sat, 9 Aug 2003 03:45:29 -0400 Received: from waste.org ([209.173.204.2]:2223 "EHLO waste.org") by vger.kernel.org with ESMTP id S272272AbTHIHpM (ORCPT ); Sat, 9 Aug 2003 03:45:12 -0400 Date: Sat, 9 Aug 2003 02:44:59 -0500 From: Matt Mackall To: linux-kernel Cc: Andrew Morton , jmorris@intercode.com.au, davem@redhat.com Subject: [RFC][PATCH] Make cryptoapi non-optional? Message-ID: <20030809074459.GQ31810@waste.org> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.3.28i Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org The attached (lightly tested) patch gets rid of the SHA in the /dev/random code and replaces it with cryptoapi, leaving us with just one SHA implementation. It also updates syncookies. This code is already at about 125% of baseline throughput, and can probably reach 250% with some tweaking of cryptoapi's redundant padding (in case anyone else cares about being able to get 120Mb/s of cryptographically strong random data). The potentially controversial part is that the random driver is currently non-optional and this patch would make cryptoapi non-optional as well. I plan to cryptoapi-ify the outstanding MD5 instance as well. diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c --- orig/drivers/char/random.c 2003-08-08 11:14:15.000000000 -0500 +++ work/drivers/char/random.c 2003-08-08 16:40:30.000000000 -0500 @@ -249,11 +249,13 @@ #include #include #include +#include #include #include #include #include +#include /* * Configuration information @@ -772,122 +774,6 @@ add_timer_randomness(disk->random, 0x100+MKDEV(disk->major, disk->first_minor)); } -/****************************************************************** - * - * Hash function definition - * - *******************************************************************/ - -/* - * This chunk of code defines a function - * void SHATransform(__u32 digest[HASH_BUFFER_SIZE + HASH_EXTRA_SIZE], - * __u32 const data[16]) - * - * The function hashes the input data to produce a digest in the first - * HASH_BUFFER_SIZE words of the digest[] array, and uses HASH_EXTRA_SIZE - * more words for internal purposes. (This buffer is exported so the - * caller can wipe it once rather than this code doing it each call, - * and tacking it onto the end of the digest[] array is the quick and - * dirty way of doing it.) - * - * For /dev/random purposes, the length of the data being hashed is - * fixed in length, so appending a bit count in the usual way is not - * cryptographically necessary. - */ - -#define HASH_BUFFER_SIZE 5 -#define HASH_EXTRA_SIZE 80 - -/* - * SHA transform algorithm, taken from code written by Peter Gutmann, - * and placed in the public domain. - */ - -/* The SHA f()-functions. */ - -#define f1(x,y,z) ( z ^ (x & (y^z)) ) /* Rounds 0-19: x ? y : z */ -#define f2(x,y,z) (x ^ y ^ z) /* Rounds 20-39: XOR */ -#define f3(x,y,z) ( (x & y) + (z & (x ^ y)) ) /* Rounds 40-59: majority */ -#define f4(x,y,z) (x ^ y ^ z) /* Rounds 60-79: XOR */ - -/* The SHA Mysterious Constants */ - -#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */ -#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */ -#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */ -#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */ - -#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) ) - -#define subRound(a, b, c, d, e, f, k, data) \ - ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) ) - - -static void SHATransform(__u32 digest[85], __u32 const data[16]) -{ - __u32 A, B, C, D, E; /* Local vars */ - __u32 TEMP; - int i; -#define W (digest + HASH_BUFFER_SIZE) /* Expanded data array */ - - /* - * Do the preliminary expansion of 16 to 80 words. Doing it - * out-of-line line like this is faster than doing it in-line on - * register-starved machines like the x86, and not really any - * slower on real processors. - */ - memcpy(W, data, 16*sizeof(__u32)); - for (i = 0; i < 64; i++) { - TEMP = W[i] ^ W[i+2] ^ W[i+8] ^ W[i+13]; - W[i+16] = ROTL(1, TEMP); - } - - /* Set up first buffer and local data buffer */ - A = digest[ 0 ]; - B = digest[ 1 ]; - C = digest[ 2 ]; - D = digest[ 3 ]; - E = digest[ 4 ]; - - /* Heavy mangling, in 4 sub-rounds of 20 iterations each. */ - for (i = 0; i < 80; i++) { - if (i < 40) { - if (i < 20) - TEMP = f1(B, C, D) + K1; - else - TEMP = f2(B, C, D) + K2; - } else { - if (i < 60) - TEMP = f3(B, C, D) + K3; - else - TEMP = f4(B, C, D) + K4; - } - TEMP += ROTL(5, A) + E + W[i]; - E = D; D = C; C = ROTL(30, B); B = A; A = TEMP; - } - - /* Build message digest */ - digest[ 0 ] += A; - digest[ 1 ] += B; - digest[ 2 ] += C; - digest[ 3 ] += D; - digest[ 4 ] += E; - - /* W is wiped by the caller */ -#undef W -} - -#undef ROTL -#undef f1 -#undef f2 -#undef f3 -#undef f4 -#undef K1 -#undef K2 -#undef K3 -#undef K4 -#undef subRound - /********************************************************************* * * Entropy extraction routines @@ -896,8 +782,6 @@ #define EXTRACT_ENTROPY_USER 1 #define EXTRACT_ENTROPY_LIMIT 2 -#define TMP_BUF_SIZE (HASH_BUFFER_SIZE + HASH_EXTRA_SIZE) -#define SEC_XFER_SIZE (TMP_BUF_SIZE*4) static ssize_t extract_entropy(struct entropy_store *r, void * buf, size_t nbytes, int flags); @@ -909,7 +793,7 @@ */ static void reseed_pool(struct entropy_store *r, int margin, int wanted) { - __u32 tmp[TMP_BUF_SIZE]; + __u32 tmp[32]; /* 256 bits */ int bytes; DEBUG_ENT("reseed %s wants %d bits (margin %d)\n", @@ -944,14 +828,11 @@ static ssize_t extract_entropy(struct entropy_store *r, void * buf, size_t nbytes, int flags) { - ssize_t ret, i; - __u32 tmp[TMP_BUF_SIZE]; - __u32 x; + ssize_t ret, i, x; unsigned long cpuflags; - - /* Redundant, but just in case... */ - if (r->entropy_count > r->poolinfo->POOLBITS) - r->entropy_count = r->poolinfo->POOLBITS; + struct crypto_tfm *tfm; + struct scatterlist sg[1]; + __u32 hash[5]; /* 160 bits */ /* Hold lock while accounting */ spin_lock_irqsave(&r->lock, cpuflags); @@ -975,6 +856,9 @@ spin_unlock_irqrestore(&r->lock, cpuflags); ret = 0; + + tfm = crypto_alloc_tfm("sha1", 0); + while (nbytes) { /* * Check if we need to break out or reschedule.... @@ -987,19 +871,10 @@ } DEBUG_ENT("extract sleep (%d bytes left)\n", nbytes); - schedule(); - DEBUG_ENT("extract wake\n"); } - /* Hash the pool to get the output */ - tmp[0] = 0x67452301; - tmp[1] = 0xefcdab89; - tmp[2] = 0x98badcfe; - tmp[3] = 0x10325476; - tmp[4] = 0xc3d2e1f0; - /* * As we hash the pool, we mix intermediate values of * the hash back into the pool. This eliminates @@ -1008,40 +883,40 @@ * attempts to find previous ouputs), unless the hash * function can be inverted. */ + for (i = 0, x = 0; i < r->poolinfo->poolwords; i += 16, x+=2) { - SHATransform(tmp, r->pool+i); - add_entropy_words(r, &tmp[x%HASH_BUFFER_SIZE], 1); + sg[0].page = virt_to_page(r->pool+i); + sg[0].offset = ((long)(r->pool+i) & ~PAGE_MASK); + sg[0].length = 64; + crypto_digest_digest(tfm, sg, 1, (char *)hash); + add_entropy_words(r, &hash[x%20], 1); } /* * In case the hash function has some recognizable * output pattern, we fold it in half. */ - for (i = 0; i < HASH_BUFFER_SIZE/2; i++) - tmp[i] ^= tmp[i + (HASH_BUFFER_SIZE+1)/2]; -#if HASH_BUFFER_SIZE & 1 /* There's a middle word to deal with */ - x = tmp[HASH_BUFFER_SIZE/2]; - x ^= (x >> 16); /* Fold it in half */ - ((__u16 *)tmp)[HASH_BUFFER_SIZE-1] = (__u16)x; -#endif + hash[0] ^= hash[3]; + hash[1] ^= hash[4]; /* Copy data to destination buffer */ - i = min(nbytes, HASH_BUFFER_SIZE*sizeof(__u32)/2); + i = min(nbytes, sizeof(hash)/2); if (flags & EXTRACT_ENTROPY_USER) { - i -= copy_to_user(buf, (__u8 const *)tmp, i); + i -= copy_to_user(buf, (__u8 const *)hash, i); if (!i) { ret = -EFAULT; break; } } else - memcpy(buf, (__u8 const *)tmp, i); + memcpy(buf, (__u8 const *)hash, i); nbytes -= i; buf += i; ret += i; } /* Wipe data just returned from memory */ - memset(tmp, 0, sizeof(tmp)); + memset(hash, 0, sizeof(hash)); + crypto_free_tfm(tfm); return ret; } @@ -1160,15 +1035,10 @@ static ssize_t random_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos) { - ssize_t n, retval = 0, count = 0; + ssize_t n, retval = 0, count = 0; - if (nbytes == 0) - return 0; - while (nbytes > 0) { - n = nbytes; - if (n > SEC_XFER_SIZE) - n = SEC_XFER_SIZE; + n = min_t(size_t, nbytes, BLOCKING_POOL_SIZE/8); /* We can take all the entropy in the input pool */ reseed_pool(blocking_pool, 0, n); @@ -1846,13 +1716,16 @@ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) static int syncookie_init; -static __u32 syncookie_secret[2][16-3+HASH_BUFFER_SIZE]; +static __u32 syncookie_secret[2][16-3]; __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport, __u32 sseq, __u32 count, __u32 data) { - __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE]; - __u32 seq; + __u32 tmp[16]; /* 512 bits */ + __u32 hash[5]; /* 160 bits */ + __u32 seq; + struct crypto_tfm *tfm; + struct scatterlist sg[1]; /* * Pick two random secrets the first time we need a cookie. @@ -1873,22 +1746,27 @@ * MSS into the second hash value. */ - memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0])); + sg[0].page = virt_to_page(tmp); + sg[0].offset = ((long) tmp & ~PAGE_MASK); + sg[0].length = sizeof(tmp); + tmp[0]=saddr; tmp[1]=daddr; tmp[2]=(sport << 16) + dport; - SHATransform(tmp+16, tmp); - seq = tmp[17] + sseq + (count << COOKIEBITS); + + memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0])); + tfm = crypto_alloc_tfm("sha1", 0); + crypto_digest_digest(tfm, sg, 1, (char *)hash); + + seq = hash[1] + sseq + (count << COOKIEBITS); memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1])); - tmp[0]=saddr; - tmp[1]=daddr; - tmp[2]=(sport << 16) + dport; tmp[3] = count; /* minute counter */ - SHATransform(tmp+16, tmp); + crypto_digest_digest(tfm, sg, 1, (char *)hash); + crypto_free_tfm(tfm); /* Add in the second hash and the data */ - return seq + ((tmp[17] + data) & COOKIEMASK); + return seq + ((hash[1] + data) & COOKIEMASK); } /* @@ -1903,19 +1781,29 @@ __u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport, __u32 sseq, __u32 count, __u32 maxdiff) { - __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE]; - __u32 diff; + __u32 tmp[16]; /* 512 bits */ + __u32 hash[5]; /* 160 bits */ + __u32 diff; + struct crypto_tfm *tfm; + struct scatterlist sg[1]; if (syncookie_init == 0) return (__u32)-1; /* Well, duh! */ - /* Strip away the layers from the cookie */ - memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0])); + sg[0].page = virt_to_page(tmp); + sg[0].offset = ((long) tmp & ~PAGE_MASK); + sg[0].length = sizeof(tmp); + tmp[0]=saddr; tmp[1]=daddr; tmp[2]=(sport << 16) + dport; - SHATransform(tmp+16, tmp); - cookie -= tmp[17] + sseq; + + /* Strip away the layers from the cookie */ + memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0])); + tfm = crypto_alloc_tfm("sha1", 0); + crypto_digest_digest(tfm, sg, 1, (char *)hash); + + cookie -= hash[1] + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ diff = (count - (cookie >> COOKIEBITS)) & ((__u32)-1 >> COOKIEBITS); @@ -1923,13 +1811,11 @@ return (__u32)-1; memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1])); - tmp[0] = saddr; - tmp[1] = daddr; - tmp[2] = (sport << 16) + dport; tmp[3] = count - diff; /* minute counter */ - SHATransform(tmp+16, tmp); + crypto_digest_digest(tfm, sg, 1, (char *)hash); + crypto_free_tfm(tfm); - return (cookie - tmp[17]) & COOKIEMASK; /* Leaving the data behind */ + return (cookie - hash[1]) & COOKIEMASK; /* Leaving the data behind */ } #endif -- Matt Mackall : http://www.selenic.com : of or relating to the moon