All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] crypto: cast5: simplify if-statements
@ 2010-10-27 10:03 Nicolas Kaiser
  2010-11-04 15:14 ` Nicolas Kaiser
  2010-11-04 18:59   ` Herbert Xu
  0 siblings, 2 replies; 4+ messages in thread
From: Nicolas Kaiser @ 2010-10-27 10:03 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David S. Miller, Kartikey Mahendra Bhatt, linux-crypto, linux-kernel

I noticed that by factoring out common rounds from the
branches of the if-statements in the encryption and
decryption functions, the executable file size goes down
significantly, for crypto/cast5.ko from 26688 bytes
to 24336 bytes (amd64).

On my test system, I saw a slight speedup. This is the
first time I'm doing such a benchmark - I found a similar
one on the crypto mailing list, and I hope I did it right?

Before:
# cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128
Passsatz eingeben: 
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,43484 s, 21,5 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,4089 s, 21,8 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,41091 s, 21,7 MB/s

After:
# cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128
Passsatz eingeben: 
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,38128 s, 22,0 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,29486 s, 22,8 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,37162 s, 22,1 MB/s

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
---
 crypto/cast5.c |   74 ++++++++++++++++++-------------------------------------
 1 files changed, 24 insertions(+), 50 deletions(-)

diff --git a/crypto/cast5.c b/crypto/cast5.c
index a1d2294..4a230dd 100644
--- a/crypto/cast5.c
+++ b/crypto/cast5.c
@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 	 * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
 	 */
 
+	t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
+	t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
+	t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
+	t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
+	t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
+	t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
+	t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
+	t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
+	t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
+	t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
+	t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
+	t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
 	if (!(c->rr)) {
-		t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
-		t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
-		t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
-		t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
-		t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
-		t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
-		t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
-		t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
-		t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
-		t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
-		t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
-		t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
 		t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
 		t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
 		t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
 		t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
-	} else {
-		t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
-		t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
-		t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
-		t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
-		t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
-		t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
-		t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
-		t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
-		t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
-		t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
-		t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
-		t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
 	}
 
 	/* c1...c64 <-- (R16,L16).  (Exchange final blocks L16, R16 and
@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 		t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
 		t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
 		t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
-		t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
-		t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
-		t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
-		t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
-		t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
-		t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
-		t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
-		t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
-		t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
-		t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
-		t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
-		t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
-	} else {
-		t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
-		t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
-		t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
-		t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
-		t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
-		t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
-		t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
-		t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
-		t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
-		t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
-		t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
-		t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
 	}
+	t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
+	t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
+	t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
+	t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
+	t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
+	t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
+	t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
+	t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
+	t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
+	t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
+	t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
+	t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
 
 	dst[0] = cpu_to_be32(r);
 	dst[1] = cpu_to_be32(l);
-- 
1.7.2.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] crypto: cast5: simplify if-statements
  2010-10-27 10:03 [PATCH] crypto: cast5: simplify if-statements Nicolas Kaiser
@ 2010-11-04 15:14 ` Nicolas Kaiser
  2010-11-04 18:59   ` Herbert Xu
  1 sibling, 0 replies; 4+ messages in thread
From: Nicolas Kaiser @ 2010-11-04 15:14 UTC (permalink / raw)
  To: linux-crypto

* Nicolas Kaiser <nikai@nikai.net>:
> I noticed that by factoring out common rounds from the
> branches of the if-statements in the encryption and
> decryption functions, the executable file size goes down
> significantly, for crypto/cast5.ko from 26688 bytes
> to 24336 bytes (amd64).
> 
> On my test system, I saw a slight speedup.

Hi there!

How would I add a simple cast5 cipher speed test to tcrypt.c, like this?
	case 14:
		test_cipher_speed("ecb(cast5)", ENCRYPT, sec, NULL, 0,
				  speed_template_8);
		test_cipher_speed("ecb(cast5)", DECRYPT, sec, NULL, 0,
				  speed_template_8);
		test_cipher_speed("cbc(cast5)", ENCRYPT, sec, NULL, 0,
				  speed_template_8);
		test_cipher_speed("cbc(cast5)", DECRYPT, sec, NULL, 0,
				  speed_template_8);
		break;

Does that make sense? Here are my results:
# insmod crypto/tcrypt.ko mode=14 sec=30

Before:
testing speed of ecb(cast5) encryption
test 0 (64 bit key, 16 byte blocks): 118241789 operations in 30 seconds (1891868624 bytes)
test 1 (64 bit key, 64 byte blocks): 40502820 operations in 30 seconds (2592180480 bytes)
test 2 (64 bit key, 256 byte blocks): 11111145 operations in 30 seconds (2844453120 bytes)
test 3 (64 bit key, 1024 byte blocks): 2858271 operations in 30 seconds (2926869504 bytes)
test 4 (64 bit key, 8192 byte blocks): 359327 operations in 30 seconds (2943606784 bytes)

testing speed of ecb(cast5) decryption
test 0 (64 bit key, 16 byte blocks): 118443232 operations in 30 seconds (1895091712 bytes)
test 1 (64 bit key, 64 byte blocks): 40060571 operations in 30 seconds (2563876544 bytes)
test 2 (64 bit key, 256 byte blocks): 10944717 operations in 30 seconds (2801847552 bytes)
test 3 (64 bit key, 1024 byte blocks): 2811154 operations in 30 seconds (2878621696 bytes)
test 4 (64 bit key, 8192 byte blocks): 353043 operations in 30 seconds (2892128256 bytes)

testing speed of cbc(cast5) encryption
test 0 (64 bit key, 16 byte blocks): 110614202 operations in 30 seconds (1769827232 bytes)
test 1 (64 bit key, 64 byte blocks): 37260496 operations in 30 seconds (2384671744 bytes)
test 2 (64 bit key, 256 byte blocks): 10323268 operations in 30 seconds (2642756608 bytes)
test 3 (64 bit key, 1024 byte blocks): 2650259 operations in 30 seconds (2713865216 bytes)
test 4 (64 bit key, 8192 byte blocks): 333281 operations in 30 seconds (2730237952 bytes)

testing speed of cbc(cast5) decryption
test 0 (64 bit key, 16 byte blocks): 102455488 operations in 30 seconds (1639287808 bytes)
test 1 (64 bit key, 64 byte blocks): 35507599 operations in 30 seconds (2272486336 bytes)
test 2 (64 bit key, 256 byte blocks): 9962653 operations in 30 seconds (2550439168 bytes)
test 3 (64 bit key, 1024 byte blocks): 2569557 operations in 30 seconds (2631226368 bytes)
test 4 (64 bit key, 8192 byte blocks): 323085 operations in 30 seconds (2646712320 bytes)

 that's in sum:
87993190 bytes/s
86877105 bytes/s
81609058 bytes/s
78267680 bytes/s


After:
testing speed of ecb(cast5) encryption
test 0 (64 bit key, 16 byte blocks): 120033632 operations in 30 seconds (1920538112 bytes)
test 1 (64 bit key, 64 byte blocks): 41484473 operations in 30 seconds (2655006272 bytes)
test 2 (64 bit key, 256 byte blocks): 11418041 operations in 30 seconds (2923018496 bytes)
test 3 (64 bit key, 1024 byte blocks): 2940940 operations in 30 seconds (3011522560 bytes)
test 4 (64 bit key, 8192 byte blocks): 369831 operations in 30 seconds (3029655552 bytes)

testing speed of ecb(cast5) decryption
test 0 (64 bit key, 16 byte blocks): 122215696 operations in 30 seconds (1955451136 bytes)
test 1 (64 bit key, 64 byte blocks): 41845591 operations in 30 seconds (2678117824 bytes)
test 2 (64 bit key, 256 byte blocks): 11478322 operations in 30 seconds (2938450432 bytes)
test 3 (64 bit key, 1024 byte blocks): 2952595 operations in 30 seconds (3023457280 bytes)
test 4 (64 bit key, 8192 byte blocks): 371305 operations in 30 seconds (3041730560 bytes)

testing speed of cbc(cast5) encryption
test 0 (64 bit key, 16 byte blocks): 112301700 operations in 30 seconds (1796827200 bytes)
test 1 (64 bit key, 64 byte blocks): 38131586 operations in 30 seconds (2440421504 bytes)
test 2 (64 bit key, 256 byte blocks): 10609299 operations in 30 seconds (2715980544 bytes)
test 3 (64 bit key, 1024 byte blocks): 2728723 operations in 30 seconds (2794212352 bytes)
test 4 (64 bit key, 8192 byte blocks): 343249 operations in 30 seconds (2811895808 bytes)

testing speed of cbc(cast5) decryption
test 0 (64 bit key, 16 byte blocks): 106721453 operations in 30 seconds (1707543248 bytes)
test 1 (64 bit key, 64 byte blocks): 37573333 operations in 30 seconds (2404693312 bytes)
test 2 (64 bit key, 256 byte blocks): 10624511 operations in 30 seconds (2719874816 bytes)
test 3 (64 bit key, 1024 byte blocks): 2745305 operations in 30 seconds (2811192320 bytes)
test 4 (64 bit key, 8192 byte blocks): 345642 operations in 30 seconds (2831499264 bytes)

 that's in sum:
90264940 bytes/s
90914715 bytes/s
83728916 bytes/s
83165353 bytes/s


Best regards,
Nicolas Kaiser

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] crypto: cast5: simplify if-statements
  2010-10-27 10:03 [PATCH] crypto: cast5: simplify if-statements Nicolas Kaiser
@ 2010-11-04 18:59   ` Herbert Xu
  2010-11-04 18:59   ` Herbert Xu
  1 sibling, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2010-11-04 18:59 UTC (permalink / raw)
  To: Nicolas Kaiser
  Cc: David S. Miller, Kartikey Mahendra Bhatt, linux-crypto, linux-kernel

On Wed, Oct 27, 2010 at 12:03:24PM +0200, Nicolas Kaiser wrote:
> I noticed that by factoring out common rounds from the
> branches of the if-statements in the encryption and
> decryption functions, the executable file size goes down
> significantly, for crypto/cast5.ko from 26688 bytes
> to 24336 bytes (amd64).

Patch applied.  Thanks a lot!
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] crypto: cast5: simplify if-statements
@ 2010-11-04 18:59   ` Herbert Xu
  0 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2010-11-04 18:59 UTC (permalink / raw)
  To: Nicolas Kaiser
  Cc: David S. Miller, Kartikey Mahendra Bhatt, linux-crypto, linux-kernel

On Wed, Oct 27, 2010 at 12:03:24PM +0200, Nicolas Kaiser wrote:
> I noticed that by factoring out common rounds from the
> branches of the if-statements in the encryption and
> decryption functions, the executable file size goes down
> significantly, for crypto/cast5.ko from 26688 bytes
> to 24336 bytes (amd64).

Patch applied.  Thanks a lot!
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-11-04 18:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-27 10:03 [PATCH] crypto: cast5: simplify if-statements Nicolas Kaiser
2010-11-04 15:14 ` Nicolas Kaiser
2010-11-04 18:59 ` Herbert Xu
2010-11-04 18:59   ` Herbert Xu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.