All of lore.kernel.org
 help / color / mirror / Atom feed
* [[RFC] PATCH 1/4] crypto: blowfish: split generic and common c code
@ 2011-09-01 22:45 Jussi Kivilinna
  2011-09-01 22:45 ` [[RFC] PATCH 2/4] crypto: blowfish: rename C-version to blowfish_generic Jussi Kivilinna
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Jussi Kivilinna @ 2011-09-01 22:45 UTC (permalink / raw)
  To: linux-crypto; +Cc: Herbert Xu, David S. Miller

Patch splits up the blowfish crypto routine into a common part (key setup)
which will be used by blowfish crypto modules (x86_64 assembly and generic-c).

Also fixes errors/warnings reported by checkpatch.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
 crypto/Kconfig            |   10 +
 crypto/Makefile           |    1 
 crypto/blowfish.c         |  367 +----------------------------------------
 crypto/blowfish_common.c  |  402 +++++++++++++++++++++++++++++++++++++++++++++
 include/crypto/blowfish.h |   23 +++
 5 files changed, 448 insertions(+), 355 deletions(-)
 create mode 100644 crypto/blowfish_common.c
 create mode 100644 include/crypto/blowfish.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 55c50cd..108cb98 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -600,6 +600,7 @@ config CRYPTO_ARC4
 config CRYPTO_BLOWFISH
 	tristate "Blowfish cipher algorithm"
 	select CRYPTO_ALGAPI
+	select CRYPTO_BLOWFISH_COMMON
 	help
 	  Blowfish cipher algorithm, by Bruce Schneier.
 
@@ -610,6 +611,15 @@ config CRYPTO_BLOWFISH
 	  See also:
 	  <http://www.schneier.com/blowfish.html>
 
+config CRYPTO_BLOWFISH_COMMON
+	tristate
+	help
+	  Common parts of the Blowfish cipher algorithm shared by the
+	  generic c and the assembler implementations.
+
+	  See also:
+	  <http://www.schneier.com/blowfish.html>
+
 config CRYPTO_CAMELLIA
 	tristate "Camellia cipher algorithms"
 	depends on CRYPTO
diff --git a/crypto/Makefile b/crypto/Makefile
index ce5a813..495b791 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
+obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
 obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
diff --git a/crypto/blowfish.c b/crypto/blowfish.c
index a67d52e..0f86d31 100644
--- a/crypto/blowfish.c
+++ b/crypto/blowfish.c
@@ -22,282 +22,7 @@
 #include <asm/byteorder.h>
 #include <linux/crypto.h>
 #include <linux/types.h>
-
-#define BF_BLOCK_SIZE 8
-#define BF_MIN_KEY_SIZE 4
-#define BF_MAX_KEY_SIZE 56
-
-struct bf_ctx {
-	u32 p[18];
-	u32 s[1024];
-};
-
-static const u32 bf_pbox[16 + 2] = {
-	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
-	0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
-	0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
-	0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
-	0x9216d5d9, 0x8979fb1b,
-};
-
-static const u32 bf_sbox[256 * 4] = {
-	0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
-	0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
-	0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
-	0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
-	0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
-	0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
-	0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
-	0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
-	0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
-	0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
-	0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
-	0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
-	0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
-	0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
-	0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
-	0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
-	0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
-	0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
-	0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
-	0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
-	0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
-	0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
-	0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
-	0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
-	0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
-	0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
-	0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
-	0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
-	0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
-	0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
-	0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
-	0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
-	0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
-	0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
-	0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
-	0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
-	0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
-	0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
-	0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
-	0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
-	0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
-	0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
-	0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
-	0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
-	0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
-	0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
-	0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
-	0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
-	0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
-	0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
-	0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
-	0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
-	0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
-	0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
-	0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
-	0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
-	0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
-	0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
-	0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
-	0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
-	0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
-	0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
-	0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
-	0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
-	0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
-	0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
-	0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
-	0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
-	0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
-	0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
-	0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
-	0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
-	0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
-	0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
-	0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
-	0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
-	0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
-	0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
-	0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
-	0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
-	0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
-	0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
-	0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
-	0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
-	0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
-	0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
-	0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
-	0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
-	0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
-	0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
-	0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
-	0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
-	0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
-	0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
-	0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
-	0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
-	0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
-	0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
-	0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
-	0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
-	0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
-	0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
-	0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
-	0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
-	0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
-	0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
-	0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
-	0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
-	0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
-	0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
-	0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
-	0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
-	0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
-	0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
-	0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
-	0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
-	0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
-	0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
-	0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
-	0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
-	0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
-	0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
-	0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
-	0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
-	0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
-	0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
-	0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
-	0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
-	0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
-	0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
-	0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
-	0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
-	0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
-	0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
-	0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
-	0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
-	0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
-	0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
-	0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
-	0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
-	0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
-	0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
-	0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
-	0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
-	0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
-	0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
-	0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
-	0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
-	0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
-	0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
-	0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
-	0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
-	0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
-	0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
-	0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
-	0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
-	0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
-	0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
-	0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
-	0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
-	0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
-	0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
-	0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
-	0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
-	0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
-	0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
-	0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
-	0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
-	0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
-	0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
-	0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
-	0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
-	0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
-	0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
-	0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
-	0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
-	0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
-	0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
-	0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
-	0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
-	0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
-	0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
-	0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
-	0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
-	0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
-	0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
-	0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
-	0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
-	0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
-	0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
-	0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
-	0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
-	0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
-	0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
-	0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
-	0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
-	0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
-	0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
-	0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
-	0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
-	0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
-	0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
-	0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
-	0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
-	0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
-	0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
-	0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
-	0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
-	0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
-	0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
-	0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
-	0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
-	0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
-	0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
-	0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
-	0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
-	0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
-	0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
-	0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
-	0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
-	0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
-	0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
-	0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
-	0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
-	0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
-	0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
-	0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
-	0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
-	0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
-	0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
-	0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
-	0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
-	0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
-	0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
-	0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
-	0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
-	0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
-	0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
-	0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
-	0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
-	0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
-	0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
-	0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
-	0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
-	0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
-	0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
-	0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
-	0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
-	0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
-	0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
-	0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
-	0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
-	0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
-	0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
-	0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
-	0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
-};
+#include <crypto/blowfish.h>
 
 /*
  * Round loop unrolling macros, S is a pointer to a S-Box array
@@ -313,16 +38,15 @@ static const u32 bf_sbox[256 * 4] = {
 
 #define ROUND(a, b, n)  b ^= P[n]; a ^= bf_F (b)
 
-/*
- * The blowfish encipher, processes 64-bit blocks.
- * NOTE: This function MUSTN'T respect endianess
- */
-static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src)
+static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	const u32 *P = bctx->p;
-	const u32 *S = bctx->s;
-	u32 yl = src[0];
-	u32 yr = src[1];
+	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
+	const __be32 *in_blk = (const __be32 *)src;
+	__be32 *const out_blk = (__be32 *)dst;
+	const u32 *P = ctx->p;
+	const u32 *S = ctx->s;
+	u32 yl = be32_to_cpu(in_blk[0]);
+	u32 yr = be32_to_cpu(in_blk[1]);
 
 	ROUND(yr, yl, 0);
 	ROUND(yl, yr, 1);
@@ -344,21 +68,8 @@ static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src)
 	yl ^= P[16];
 	yr ^= P[17];
 
-	dst[0] = yr;
-	dst[1] = yl;
-}
-
-static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	const __be32 *in_blk = (const __be32 *)src;
-	__be32 *const out_blk = (__be32 *)dst;
-	u32 in32[2], out32[2];
-
-	in32[0] = be32_to_cpu(in_blk[0]);
-	in32[1] = be32_to_cpu(in_blk[1]);
-	encrypt_block(crypto_tfm_ctx(tfm), out32, in32);
-	out_blk[0] = cpu_to_be32(out32[0]);
-	out_blk[1] = cpu_to_be32(out32[1]);
+	out_blk[0] = cpu_to_be32(yr);
+	out_blk[1] = cpu_to_be32(yl);
 }
 
 static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -395,60 +106,6 @@ static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	out_blk[1] = cpu_to_be32(yl);
 }
 
-/*
- * Calculates the blowfish S and P boxes for encryption and decryption.
- */
-static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
-{
-	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *P = ctx->p;
-	u32 *S = ctx->s;
-	short i, j, count;
-	u32 data[2], temp;
-
-	/* Copy the initialization s-boxes */
-	for (i = 0, count = 0; i < 256; i++)
-		for (j = 0; j < 4; j++, count++)
-			S[count] = bf_sbox[count];
-
-	/* Set the p-boxes */
-	for (i = 0; i < 16 + 2; i++)
-		P[i] = bf_pbox[i];
-
-	/* Actual subkey generation */
-	for (j = 0, i = 0; i < 16 + 2; i++) {
-		temp = (((u32)key[j] << 24) |
-			((u32)key[(j + 1) % keylen] << 16) |
-			((u32)key[(j + 2) % keylen] << 8) |
-			((u32)key[(j + 3) % keylen]));
-
-		P[i] = P[i] ^ temp;
-		j = (j + 4) % keylen;
-	}
-
-	data[0] = 0x00000000;
-	data[1] = 0x00000000;
-
-	for (i = 0; i < 16 + 2; i += 2) {
-		encrypt_block((struct bf_ctx *)ctx, data, data);
-
-		P[i] = data[0];
-		P[i + 1] = data[1];
-	}
-
-	for (i = 0; i < 4; i++) {
-		for (j = 0, count = i * 256; j < 256; j += 2, count += 2) {
-			encrypt_block((struct bf_ctx *)ctx, data, data);
-
-			S[count] = data[0];
-			S[count + 1] = data[1];
-		}
-	}
-
-	/* Bruce says not to bother with the weak key check. */
-	return 0;
-}
-
 static struct crypto_alg alg = {
 	.cra_name		=	"blowfish",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
@@ -460,7 +117,7 @@ static struct crypto_alg alg = {
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	BF_MIN_KEY_SIZE,
 	.cia_max_keysize	=	BF_MAX_KEY_SIZE,
-	.cia_setkey   		= 	bf_setkey,
+	.cia_setkey		=	blowfish_setkey,
 	.cia_encrypt 		=	bf_encrypt,
 	.cia_decrypt  		=	bf_decrypt } }
 };
diff --git a/crypto/blowfish_common.c b/crypto/blowfish_common.c
new file mode 100644
index 0000000..f636aab
--- /dev/null
+++ b/crypto/blowfish_common.c
@@ -0,0 +1,402 @@
+/*
+ * Cryptographic API.
+ *
+ * Common Blowfish algorithm parts shared between the c and assembler
+ * implementations.
+ *
+ * Blowfish Cipher Algorithm, by Bruce Schneier.
+ * http://www.counterpane.com/blowfish.html
+ *
+ * Adapted from Kerneli implementation.
+ *
+ * Copyright (c) Herbert Valerio Riedel <hvr@hvrlab.org>
+ * Copyright (c) Kyle McMartin <kyle@debian.org>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/byteorder.h>
+#include <linux/crypto.h>
+#include <linux/types.h>
+#include <crypto/blowfish.h>
+
+static const u32 bf_pbox[16 + 2] = {
+	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
+	0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
+	0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
+	0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
+	0x9216d5d9, 0x8979fb1b,
+};
+
+static const u32 bf_sbox[256 * 4] = {
+	0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
+	0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
+	0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
+	0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
+	0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
+	0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
+	0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
+	0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
+	0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
+	0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
+	0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
+	0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
+	0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
+	0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
+	0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
+	0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
+	0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
+	0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
+	0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
+	0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
+	0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
+	0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
+	0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
+	0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
+	0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
+	0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
+	0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
+	0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
+	0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
+	0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
+	0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
+	0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
+	0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
+	0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
+	0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
+	0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
+	0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
+	0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
+	0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
+	0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
+	0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
+	0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
+	0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
+	0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
+	0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
+	0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
+	0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
+	0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
+	0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
+	0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
+	0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
+	0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
+	0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
+	0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
+	0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
+	0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
+	0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
+	0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
+	0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
+	0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
+	0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
+	0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
+	0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
+	0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
+	0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
+	0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
+	0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
+	0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
+	0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
+	0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
+	0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
+	0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
+	0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
+	0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
+	0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
+	0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
+	0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
+	0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
+	0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
+	0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
+	0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
+	0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
+	0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
+	0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
+	0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
+	0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
+	0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
+	0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
+	0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
+	0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
+	0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
+	0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
+	0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
+	0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
+	0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
+	0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
+	0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
+	0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
+	0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
+	0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
+	0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
+	0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
+	0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
+	0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
+	0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
+	0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
+	0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
+	0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
+	0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
+	0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
+	0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
+	0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
+	0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
+	0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
+	0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
+	0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
+	0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
+	0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
+	0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
+	0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
+	0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
+	0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
+	0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
+	0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
+	0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
+	0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
+	0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
+	0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
+	0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
+	0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
+	0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
+	0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
+	0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
+	0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
+	0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
+	0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
+	0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
+	0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
+	0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
+	0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
+	0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
+	0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
+	0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
+	0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
+	0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
+	0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
+	0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
+	0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
+	0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
+	0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
+	0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
+	0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
+	0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
+	0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
+	0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
+	0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
+	0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
+	0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
+	0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
+	0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
+	0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
+	0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
+	0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
+	0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
+	0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
+	0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
+	0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
+	0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
+	0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
+	0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
+	0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
+	0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
+	0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
+	0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
+	0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
+	0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
+	0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
+	0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
+	0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
+	0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
+	0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
+	0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
+	0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
+	0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
+	0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
+	0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
+	0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
+	0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
+	0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
+	0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
+	0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
+	0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
+	0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
+	0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
+	0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
+	0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
+	0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
+	0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
+	0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
+	0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
+	0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
+	0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
+	0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
+	0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
+	0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
+	0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
+	0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
+	0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
+	0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
+	0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
+	0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
+	0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
+	0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
+	0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
+	0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
+	0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
+	0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
+	0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
+	0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
+	0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
+	0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
+	0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
+	0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
+	0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
+	0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
+	0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
+	0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
+	0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
+	0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
+	0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
+	0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
+	0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
+	0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
+	0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
+	0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
+	0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
+	0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
+	0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
+	0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
+	0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
+	0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
+	0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
+	0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
+	0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
+	0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
+	0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
+	0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
+	0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
+	0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
+	0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
+	0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
+	0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
+	0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
+	0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
+	0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
+	0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
+};
+
+/*
+ * Round loop unrolling macros, S is a pointer to a S-Box array
+ * organized in 4 unsigned longs at a row.
+ */
+#define GET32_3(x) (((x) & 0xff))
+#define GET32_2(x) (((x) >> (8)) & (0xff))
+#define GET32_1(x) (((x) >> (16)) & (0xff))
+#define GET32_0(x) (((x) >> (24)) & (0xff))
+
+#define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^ \
+		S[512 + GET32_2(x)]) + S[768 + GET32_3(x)])
+
+#define ROUND(a, b, n) ({ b ^= P[n]; a ^= bf_F(b); })
+
+/*
+ * The blowfish encipher, processes 64-bit blocks.
+ * NOTE: This function MUSTN'T respect endianess
+ */
+static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src)
+{
+	const u32 *P = bctx->p;
+	const u32 *S = bctx->s;
+	u32 yl = src[0];
+	u32 yr = src[1];
+
+	ROUND(yr, yl, 0);
+	ROUND(yl, yr, 1);
+	ROUND(yr, yl, 2);
+	ROUND(yl, yr, 3);
+	ROUND(yr, yl, 4);
+	ROUND(yl, yr, 5);
+	ROUND(yr, yl, 6);
+	ROUND(yl, yr, 7);
+	ROUND(yr, yl, 8);
+	ROUND(yl, yr, 9);
+	ROUND(yr, yl, 10);
+	ROUND(yl, yr, 11);
+	ROUND(yr, yl, 12);
+	ROUND(yl, yr, 13);
+	ROUND(yr, yl, 14);
+	ROUND(yl, yr, 15);
+
+	yl ^= P[16];
+	yr ^= P[17];
+
+	dst[0] = yr;
+	dst[1] = yl;
+}
+
+/*
+ * Calculates the blowfish S and P boxes for encryption and decryption.
+ */
+int blowfish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+{
+	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *P = ctx->p;
+	u32 *S = ctx->s;
+	short i, j, count;
+	u32 data[2], temp;
+
+	/* Copy the initialization s-boxes */
+	for (i = 0, count = 0; i < 256; i++)
+		for (j = 0; j < 4; j++, count++)
+			S[count] = bf_sbox[count];
+
+	/* Set the p-boxes */
+	for (i = 0; i < 16 + 2; i++)
+		P[i] = bf_pbox[i];
+
+	/* Actual subkey generation */
+	for (j = 0, i = 0; i < 16 + 2; i++) {
+		temp = (((u32)key[j] << 24) |
+			((u32)key[(j + 1) % keylen] << 16) |
+			((u32)key[(j + 2) % keylen] << 8) |
+			((u32)key[(j + 3) % keylen]));
+
+		P[i] = P[i] ^ temp;
+		j = (j + 4) % keylen;
+	}
+
+	data[0] = 0x00000000;
+	data[1] = 0x00000000;
+
+	for (i = 0; i < 16 + 2; i += 2) {
+		encrypt_block((struct bf_ctx *)ctx, data, data);
+
+		P[i] = data[0];
+		P[i + 1] = data[1];
+	}
+
+	for (i = 0; i < 4; i++) {
+		for (j = 0, count = i * 256; j < 256; j += 2, count += 2) {
+			encrypt_block((struct bf_ctx *)ctx, data, data);
+
+			S[count] = data[0];
+			S[count + 1] = data[1];
+		}
+	}
+
+	/* Bruce says not to bother with the weak key check. */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blowfish_setkey);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Blowfish Cipher common functions");
diff --git a/include/crypto/blowfish.h b/include/crypto/blowfish.h
new file mode 100644
index 0000000..1450d4a
--- /dev/null
+++ b/include/crypto/blowfish.h
@@ -0,0 +1,23 @@
+/*
+ * Common values for blowfish algorithms
+ */
+
+#ifndef _CRYPTO_BLOWFISH_H
+#define _CRYPTO_BLOWFISH_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define BF_BLOCK_SIZE 8
+#define BF_MIN_KEY_SIZE 4
+#define BF_MAX_KEY_SIZE 56
+
+struct bf_ctx {
+	u32 p[18];
+	u32 s[1024];
+};
+
+int blowfish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		    unsigned int key_len);
+
+#endif

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [[RFC] PATCH 2/4] crypto: blowfish: rename C-version to blowfish_generic
  2011-09-01 22:45 [[RFC] PATCH 1/4] crypto: blowfish: split generic and common c code Jussi Kivilinna
@ 2011-09-01 22:45 ` Jussi Kivilinna
  2011-09-01 22:45 ` [[RFC] PATCH 3/4] crypto: tcrypt: add ctr(blowfish) speed test Jussi Kivilinna
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Jussi Kivilinna @ 2011-09-01 22:45 UTC (permalink / raw)
  To: linux-crypto; +Cc: Herbert Xu, David S. Miller

Rename blowfish to blowfish_generic so that assembler versions of blowfish
cipher can autoload. Module alias 'blowfish' is added.

Also fix checkpatch warnings.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
 crypto/Makefile           |    2 -
 crypto/blowfish.c         |  139 --------------------------------------------
 crypto/blowfish_generic.c |  142 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 140 deletions(-)
 delete mode 100644 crypto/blowfish.c
 create mode 100644 crypto/blowfish_generic.c

diff --git a/crypto/Makefile b/crypto/Makefile
index 495b791..fa8cbbb 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
-obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
+obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
 obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
diff --git a/crypto/blowfish.c b/crypto/blowfish.c
deleted file mode 100644
index 0f86d31..0000000
--- a/crypto/blowfish.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Blowfish Cipher Algorithm, by Bruce Schneier.
- * http://www.counterpane.com/blowfish.html
- *
- * Adapted from Kerneli implementation.
- *
- * Copyright (c) Herbert Valerio Riedel <hvr@hvrlab.org>
- * Copyright (c) Kyle McMartin <kyle@debian.org>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <asm/byteorder.h>
-#include <linux/crypto.h>
-#include <linux/types.h>
-#include <crypto/blowfish.h>
-
-/*
- * Round loop unrolling macros, S is a pointer to a S-Box array
- * organized in 4 unsigned longs at a row.
- */
-#define GET32_3(x) (((x) & 0xff))
-#define GET32_2(x) (((x) >> (8)) & (0xff))
-#define GET32_1(x) (((x) >> (16)) & (0xff))
-#define GET32_0(x) (((x) >> (24)) & (0xff))
-
-#define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^ \
-          S[512 + GET32_2(x)]) + S[768 + GET32_3(x)])
-
-#define ROUND(a, b, n)  b ^= P[n]; a ^= bf_F (b)
-
-static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
-	const __be32 *in_blk = (const __be32 *)src;
-	__be32 *const out_blk = (__be32 *)dst;
-	const u32 *P = ctx->p;
-	const u32 *S = ctx->s;
-	u32 yl = be32_to_cpu(in_blk[0]);
-	u32 yr = be32_to_cpu(in_blk[1]);
-
-	ROUND(yr, yl, 0);
-	ROUND(yl, yr, 1);
-	ROUND(yr, yl, 2);
-	ROUND(yl, yr, 3);
-	ROUND(yr, yl, 4);
-	ROUND(yl, yr, 5);
-	ROUND(yr, yl, 6);
-	ROUND(yl, yr, 7);
-	ROUND(yr, yl, 8);
-	ROUND(yl, yr, 9);
-	ROUND(yr, yl, 10);
-	ROUND(yl, yr, 11);
-	ROUND(yr, yl, 12);
-	ROUND(yl, yr, 13);
-	ROUND(yr, yl, 14);
-	ROUND(yl, yr, 15);
-
-	yl ^= P[16];
-	yr ^= P[17];
-
-	out_blk[0] = cpu_to_be32(yr);
-	out_blk[1] = cpu_to_be32(yl);
-}
-
-static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
-	const __be32 *in_blk = (const __be32 *)src;
-	__be32 *const out_blk = (__be32 *)dst;
-	const u32 *P = ctx->p;
-	const u32 *S = ctx->s;
-	u32 yl = be32_to_cpu(in_blk[0]);
-	u32 yr = be32_to_cpu(in_blk[1]);
-
-	ROUND(yr, yl, 17);
-	ROUND(yl, yr, 16);
-	ROUND(yr, yl, 15);
-	ROUND(yl, yr, 14);
-	ROUND(yr, yl, 13);
-	ROUND(yl, yr, 12);
-	ROUND(yr, yl, 11);
-	ROUND(yl, yr, 10);
-	ROUND(yr, yl, 9);
-	ROUND(yl, yr, 8);
-	ROUND(yr, yl, 7);
-	ROUND(yl, yr, 6);
-	ROUND(yr, yl, 5);
-	ROUND(yl, yr, 4);
-	ROUND(yr, yl, 3);
-	ROUND(yl, yr, 2);
-
-	yl ^= P[1];
-	yr ^= P[0];
-
-	out_blk[0] = cpu_to_be32(yr);
-	out_blk[1] = cpu_to_be32(yl);
-}
-
-static struct crypto_alg alg = {
-	.cra_name		=	"blowfish",
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	BF_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct bf_ctx),
-	.cra_alignmask		=	3,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
-	.cra_u			=	{ .cipher = {
-	.cia_min_keysize	=	BF_MIN_KEY_SIZE,
-	.cia_max_keysize	=	BF_MAX_KEY_SIZE,
-	.cia_setkey		=	blowfish_setkey,
-	.cia_encrypt 		=	bf_encrypt,
-	.cia_decrypt  		=	bf_decrypt } }
-};
-
-static int __init blowfish_mod_init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit blowfish_mod_fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(blowfish_mod_init);
-module_exit(blowfish_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Blowfish Cipher Algorithm");
diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c
new file mode 100644
index 0000000..6f269b5
--- /dev/null
+++ b/crypto/blowfish_generic.c
@@ -0,0 +1,142 @@
+/*
+ * Cryptographic API.
+ *
+ * Blowfish Cipher Algorithm, by Bruce Schneier.
+ * http://www.counterpane.com/blowfish.html
+ *
+ * Adapted from Kerneli implementation.
+ *
+ * Copyright (c) Herbert Valerio Riedel <hvr@hvrlab.org>
+ * Copyright (c) Kyle McMartin <kyle@debian.org>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/byteorder.h>
+#include <linux/crypto.h>
+#include <linux/types.h>
+#include <crypto/blowfish.h>
+
+/*
+ * Round loop unrolling macros, S is a pointer to a S-Box array
+ * organized in 4 unsigned longs at a row.
+ */
+#define GET32_3(x) (((x) & 0xff))
+#define GET32_2(x) (((x) >> (8)) & (0xff))
+#define GET32_1(x) (((x) >> (16)) & (0xff))
+#define GET32_0(x) (((x) >> (24)) & (0xff))
+
+#define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^ \
+		S[512 + GET32_2(x)]) + S[768 + GET32_3(x)])
+
+#define ROUND(a, b, n) ({ b ^= P[n]; a ^= bf_F(b); })
+
+static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
+	const __be32 *in_blk = (const __be32 *)src;
+	__be32 *const out_blk = (__be32 *)dst;
+	const u32 *P = ctx->p;
+	const u32 *S = ctx->s;
+	u32 yl = be32_to_cpu(in_blk[0]);
+	u32 yr = be32_to_cpu(in_blk[1]);
+
+	ROUND(yr, yl, 0);
+	ROUND(yl, yr, 1);
+	ROUND(yr, yl, 2);
+	ROUND(yl, yr, 3);
+	ROUND(yr, yl, 4);
+	ROUND(yl, yr, 5);
+	ROUND(yr, yl, 6);
+	ROUND(yl, yr, 7);
+	ROUND(yr, yl, 8);
+	ROUND(yl, yr, 9);
+	ROUND(yr, yl, 10);
+	ROUND(yl, yr, 11);
+	ROUND(yr, yl, 12);
+	ROUND(yl, yr, 13);
+	ROUND(yr, yl, 14);
+	ROUND(yl, yr, 15);
+
+	yl ^= P[16];
+	yr ^= P[17];
+
+	out_blk[0] = cpu_to_be32(yr);
+	out_blk[1] = cpu_to_be32(yl);
+}
+
+static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
+	const __be32 *in_blk = (const __be32 *)src;
+	__be32 *const out_blk = (__be32 *)dst;
+	const u32 *P = ctx->p;
+	const u32 *S = ctx->s;
+	u32 yl = be32_to_cpu(in_blk[0]);
+	u32 yr = be32_to_cpu(in_blk[1]);
+
+	ROUND(yr, yl, 17);
+	ROUND(yl, yr, 16);
+	ROUND(yr, yl, 15);
+	ROUND(yl, yr, 14);
+	ROUND(yr, yl, 13);
+	ROUND(yl, yr, 12);
+	ROUND(yr, yl, 11);
+	ROUND(yl, yr, 10);
+	ROUND(yr, yl, 9);
+	ROUND(yl, yr, 8);
+	ROUND(yr, yl, 7);
+	ROUND(yl, yr, 6);
+	ROUND(yr, yl, 5);
+	ROUND(yl, yr, 4);
+	ROUND(yr, yl, 3);
+	ROUND(yl, yr, 2);
+
+	yl ^= P[1];
+	yr ^= P[0];
+
+	out_blk[0] = cpu_to_be32(yr);
+	out_blk[1] = cpu_to_be32(yl);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"blowfish",
+	.cra_driver_name	=	"blowfish-generic",
+	.cra_priority		=	100,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	BF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct bf_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{ .cipher = {
+	.cia_min_keysize	=	BF_MIN_KEY_SIZE,
+	.cia_max_keysize	=	BF_MAX_KEY_SIZE,
+	.cia_setkey		=	blowfish_setkey,
+	.cia_encrypt		=	bf_encrypt,
+	.cia_decrypt		=	bf_decrypt } }
+};
+
+static int __init blowfish_mod_init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit blowfish_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(blowfish_mod_init);
+module_exit(blowfish_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Blowfish Cipher Algorithm");
+MODULE_ALIAS("blowfish");

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [[RFC] PATCH 3/4] crypto: tcrypt: add ctr(blowfish) speed test
  2011-09-01 22:45 [[RFC] PATCH 1/4] crypto: blowfish: split generic and common c code Jussi Kivilinna
  2011-09-01 22:45 ` [[RFC] PATCH 2/4] crypto: blowfish: rename C-version to blowfish_generic Jussi Kivilinna
@ 2011-09-01 22:45 ` Jussi Kivilinna
  2011-09-01 22:45 ` [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation Jussi Kivilinna
  2011-09-22 11:29 ` [PATCH 1/4] crypto: blowfish - split generic and common c code Herbert Xu
  3 siblings, 0 replies; 8+ messages in thread
From: Jussi Kivilinna @ 2011-09-01 22:45 UTC (permalink / raw)
  To: linux-crypto; +Cc: Herbert Xu, David S. Miller

Add ctr(blowfish) speed test to receive results for blowfish x86_64 assembly
patch.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
 crypto/tcrypt.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 2222617..e353a28 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1050,6 +1050,10 @@ static int do_test(int m)
 				  speed_template_8_32);
 		test_cipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0,
 				  speed_template_8_32);
+		test_cipher_speed("ctr(blowfish)", ENCRYPT, sec, NULL, 0,
+				  speed_template_8_32);
+		test_cipher_speed("ctr(blowfish)", DECRYPT, sec, NULL, 0,
+				  speed_template_8_32);
 		break;
 
 	case 204:

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation
  2011-09-01 22:45 [[RFC] PATCH 1/4] crypto: blowfish: split generic and common c code Jussi Kivilinna
  2011-09-01 22:45 ` [[RFC] PATCH 2/4] crypto: blowfish: rename C-version to blowfish_generic Jussi Kivilinna
  2011-09-01 22:45 ` [[RFC] PATCH 3/4] crypto: tcrypt: add ctr(blowfish) speed test Jussi Kivilinna
@ 2011-09-01 22:45 ` Jussi Kivilinna
  2011-09-19 11:19   ` Jussi Kivilinna
  2011-09-22 11:29 ` [PATCH 1/4] crypto: blowfish - split generic and common c code Herbert Xu
  3 siblings, 1 reply; 8+ messages in thread
From: Jussi Kivilinna @ 2011-09-01 22:45 UTC (permalink / raw)
  To: linux-crypto; +Cc: Herbert Xu, David S. Miller

Patch adds x86_64 assembly implementation of blowfish. Two set of assembler
functions are provided. First set is regular 'one-block at time'
encrypt/decrypt functions. Second is 'four-block at time' functions that
gain performance increase on out-of-order CPUs. Performance of 4-way
functions should be equal to 1-way functions with in-order CPUs.

Summary of the tcrypt benchmarks:

Blowfish assembler vs blowfish C (256bit 8kb block ECB)
encrypt: 2.2x speed
decrypt: 2.3x speed

Blowfish assembler vs blowfish C (256bit 8kb block CBC)
encrypt: 1.12x speed
decrypt: 2.5x speed

Blowfish assembler vs blowfish C (256bit 8kb block CTR)
encrypt: 2.5x speed

Full output:
http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-blowfish-asm-x86_64.txt
http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-blowfish-c-x86_64.txt

Tests were run on:
 vendor_id	: AuthenticAMD
 cpu family	: 16
 model		: 10
 model name	: AMD Phenom(tm) II X6 1055T Processor
 stepping	: 0

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
 arch/x86/crypto/Makefile                 |    2 
 arch/x86/crypto/blowfish-x86_64-asm_64.S |  392 ++++++++++++++++++++++++
 arch/x86/crypto/blowfish_glue.c          |  487 ++++++++++++++++++++++++++++++
 crypto/Kconfig                           |   15 +
 4 files changed, 896 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/crypto/blowfish-x86_64-asm_64.S
 create mode 100644 arch/x86/crypto/blowfish_glue.c

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 57c7f7b..725addf 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -20,6 +21,7 @@ twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
 salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
new file mode 100644
index 0000000..44eb23a
--- /dev/null
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -0,0 +1,392 @@
+/*
+ * Blowfish Cipher Algorithm (x86_64)
+ *
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "blowfish-x86_64-asm.S"
+.text
+
+/* structure of crypto context */
+#define p	0
+#define s0	((16 + 2) * 4)
+#define s1	((16 + 2 + (1 * 256)) * 4)
+#define s2	((16 + 2 + (2 * 256)) * 4)
+#define s3	((16 + 2 + (3 * 256)) * 4)
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rcx
+#define RX3 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %ecx
+#define RX3d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %cl
+#define RX3bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %ch
+#define RX3bh %dh
+
+#define RT0 %rbp
+#define RT1 %rsi
+
+#define RT0d %ebp
+#define RT1d %esi
+
+#define RK0 %r8
+#define RK1 %r9
+#define RK2 %r10
+#define RK3 %r11
+
+#define RK0d %r8d
+#define RK1d %r9d
+#define RK2d %r10d
+#define RK3d %r11d
+
+#define RKEY %r12
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F(x, k) \
+	rorq $16,		x; \
+	movzbl x ## bh,		RT0d; \
+	movzbl x ## bl,		RT1d; \
+	rolq $16,		x; \
+	movl s0(CTX,RT0,4),	k ## d; \
+	addl s1(CTX,RT1,4),	k ## d; \
+	movzbl x ## bh,		RT0d; \
+	movzbl x ## bl,		RT1d; \
+	rolq $32,		x; \
+	xorl s2(CTX,RT0,4),	k ## d; \
+	addl s3(CTX,RT1,4),	k ## d; \
+	xorq k,			x;
+
+#define add_roundkey_enc(n) \
+	xorq p+4*(n)(CTX), 	RX0;
+
+#define round_enc(n) \
+	add_roundkey_enc(n); \
+	\
+	F(RX0, RK0); \
+	F(RX0, RK0);
+
+#define round_final_enc(n) \
+	xorq p+4*(n)(CTX), 	RX0;
+
+#define add_roundkey_dec(n) \
+	movq p+4*(n-1)(CTX),	RT0; \
+	rorq $32,		RT0; \
+	xorq RT0,		RX0;
+
+#define round_dec(n) \
+	add_roundkey_dec(n); \
+	\
+	F(RX0, RK0); \
+	F(RX0, RK0); \
+
+#define read_block() \
+	movq (RIO), 		RX0; \
+	rorq $32, 		RX0; \
+	bswapq 			RX0;
+
+#define write_block() \
+	bswapq 			RX0; \
+	movq RX0, 		(RIO);
+
+#define xor_block() \
+	bswapq 			RX0; \
+	xorq RX0, 		(RIO);
+
+.align 8
+.global __blowfish_enc_blk
+.type   __blowfish_enc_blk,@function;
+
+__blowfish_enc_blk:
+	// input:
+	//	%rdi: ctx, CTX
+	//	%rsi: dst
+	//	%rdx: src
+	//	%rcx: bool xor
+	pushq %rbp;
+	pushq %rbx;
+
+	pushq %rsi;
+	pushq %rcx;
+	movq %rdx, RIO;
+
+	read_block();
+
+	round_enc(0);
+	round_enc(2);
+	round_enc(4);
+	round_enc(6);
+	round_enc(8);
+	round_enc(10);
+	round_enc(12);
+	round_enc(14);
+	add_roundkey_enc(16);
+
+	popq %rbp;
+	popq RIO;
+
+	test %bpl, %bpl;
+	jnz __enc_xor;
+
+	write_block();
+
+__enc_ret:
+	popq %rbx;
+	popq %rbp;
+
+	ret;
+
+__enc_xor:
+	xor_block();
+
+	jmp __enc_ret;
+
+.align 8
+.global blowfish_dec_blk
+.type   blowfish_dec_blk,@function;
+
+blowfish_dec_blk:
+	// input:
+	//	%rdi: ctx, CTX
+	//	%rsi: dst
+	//	%rdx: src
+	pushq %rbp;
+	pushq %rbx;
+
+	pushq %rsi;
+	movq %rdx, RIO;
+
+	read_block();
+
+	round_dec(17);
+	round_dec(15);
+	round_dec(13);
+	round_dec(11);
+	round_dec(9);
+	round_dec(7);
+	round_dec(5);
+	round_dec(3);
+	add_roundkey_dec(1);
+
+	popq RIO;
+	write_block();
+
+	popq %rbx;
+	popq %rbp;
+
+	ret;
+
+/**********************************************************************
+  4-way blowfish, four blocks parallel
+ **********************************************************************/
+#define add_preloaded_roundkey4() \
+	xorq RKEY,		RX0; \
+	xorq RKEY,		RX1; \
+	xorq RKEY,		RX2; \
+	xorq RKEY,		RX3;
+
+#define preload_roundkey_enc(n) \
+	movq p+4*(n)(CTX),	RKEY;
+
+#define add_roundkey_enc4(n) \
+	add_preloaded_roundkey4(); \
+	preload_roundkey_enc(n + 2);
+
+#define round_enc4(n) \
+	add_roundkey_enc4(n); \
+	\
+	F(RX0, RK0); \
+	F(RX1, RK1); \
+	F(RX2, RK2); \
+	F(RX3, RK3); \
+	\
+	F(RX0, RK0); \
+	F(RX1, RK1); \
+	F(RX2, RK2); \
+	F(RX3, RK3);
+
+#define preload_roundkey_dec(n) \
+	movq p+4*((n)-1)(CTX),	RKEY; \
+	rorq $32,		RKEY;
+
+#define add_roundkey_dec4(n) \
+	add_preloaded_roundkey4(); \
+	preload_roundkey_dec(n - 2);
+
+#define round_dec4(n) \
+	add_roundkey_dec4(n); \
+	\
+	F(RX0, RK0); \
+	F(RX1, RK1); \
+	F(RX2, RK2); \
+	F(RX3, RK3); \
+	\
+	F(RX0, RK0); \
+	F(RX1, RK1); \
+	F(RX2, RK2); \
+	F(RX3, RK3);
+
+#define read_block4() \
+	movq (RIO),		RX0; \
+	rorq $32,		RX0; \
+	bswapq 			RX0; \
+	\
+	movq 8(RIO),		RX1; \
+	rorq $32,		RX1; \
+	bswapq 			RX1; \
+	\
+	movq 16(RIO),		RX2; \
+	rorq $32,		RX2; \
+	bswapq 			RX2; \
+	\
+	movq 24(RIO),		RX3; \
+	rorq $32,		RX3; \
+	bswapq 			RX3;
+
+#define write_block4() \
+	bswapq 			RX0; \
+	movq RX0,		(RIO); \
+	\
+	bswapq 			RX1; \
+	movq RX1,		8(RIO); \
+	\
+	bswapq 			RX2; \
+	movq RX2,		16(RIO); \
+	\
+	bswapq 			RX3; \
+	movq RX3,		24(RIO);
+
+#define xor_block4() \
+	bswapq 			RX0; \
+	xorq RX0,		(RIO); \
+	\
+	bswapq 			RX1; \
+	xorq RX1,		8(RIO); \
+	\
+	bswapq 			RX2; \
+	xorq RX2,		16(RIO); \
+	\
+	bswapq 			RX3; \
+	xorq RX3,		24(RIO);
+
+.align 8
+.global __blowfish_enc_blk_4way
+.type   __blowfish_enc_blk_4way,@function;
+
+__blowfish_enc_blk_4way:
+	// input:
+	//	%rdi: ctx, CTX
+	//	%rsi: dst
+	//	%rdx: src
+	//	%rcx: bool xor
+	pushq %rbp;
+	pushq %rbx;
+	pushq RKEY;
+	preload_roundkey_enc(0);
+
+	pushq %rsi;
+	pushq %rcx;
+	movq %rdx, RIO;
+
+	read_block4();
+
+	round_enc4(0);
+	round_enc4(2);
+	round_enc4(4);
+	round_enc4(6);
+	round_enc4(8);
+	round_enc4(10);
+	round_enc4(12);
+	round_enc4(14);
+	add_preloaded_roundkey4();
+
+	popq %rbp;
+	popq RIO;
+
+	test %bpl, %bpl;
+	jnz __enc_xor4;
+
+	write_block4();
+
+__enc_ret4:
+	popq RKEY;
+	popq %rbx;
+	popq %rbp;
+
+	ret;
+
+__enc_xor4:
+	xor_block4();
+
+	jmp __enc_ret4;
+
+.align 8
+.global blowfish_dec_blk_4way
+.type   blowfish_dec_blk_4way,@function;
+
+blowfish_dec_blk_4way:
+	// input:
+	//	%rdi: ctx, CTX
+	//	%rsi: dst
+	//	%rdx: src
+	pushq %rbp;
+	pushq %rbx;
+	pushq RKEY;
+	preload_roundkey_dec(17);
+
+	pushq %rsi;
+	movq %rdx, RIO;
+
+	read_block4();
+
+	round_dec4(17);
+	round_dec4(15);
+	round_dec4(13);
+	round_dec4(11);
+	round_dec4(9);
+	round_dec4(7);
+	round_dec4(5);
+	round_dec4(3);
+	add_preloaded_roundkey4();
+
+	popq RIO;
+	write_block4();
+
+	popq RKEY;
+	popq %rbx;
+	popq %rbp;
+
+	ret;
+
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
new file mode 100644
index 0000000..40911ab
--- /dev/null
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -0,0 +1,487 @@
+/*
+ * Glue Code for assembler optimized version of Blowfish
+ *
+ * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <crypto/blowfish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+
+/* regular block cipher functions */
+asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
+				   bool xor);
+asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
+
+/* 4-way parallel cipher functions */
+asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+					const u8 *src, bool xor);
+asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
+				      const u8 *src);
+
+static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
+{
+	__blowfish_enc_blk(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__blowfish_enc_blk(ctx, dst, src, true);
+}
+
+static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+					 const u8 *src)
+{
+	__blowfish_enc_blk_4way(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__blowfish_enc_blk_4way(ctx, dst, src, true);
+}
+
+static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static struct crypto_alg bf_alg = {
+	.cra_name		=	"blowfish",
+	.cra_driver_name	=	"blowfish-asm",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	BF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct bf_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(bf_alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	BF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	BF_MAX_KEY_SIZE,
+			.cia_setkey		=	blowfish_setkey,
+			.cia_encrypt		=	blowfish_encrypt,
+			.cia_decrypt		=	blowfish_decrypt,
+		}
+	}
+};
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
+		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		/* Process four block batch */
+		if (nbytes >= bsize * 4) {
+			do {
+				fn_4way(ctx, wdst, wsrc);
+
+				wsrc += bsize * 4;
+				wdst += bsize * 4;
+				nbytes -= bsize * 4;
+			} while (nbytes >= bsize * 4);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			fn(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+}
+
+static struct crypto_alg blk_ecb_alg = {
+	.cra_name		= "ecb(blowfish)",
+	.cra_driver_name	= "ecb-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+};
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 *iv = (u64 *)walk->iv;
+
+	do {
+		*dst = *src ^ *iv;
+		blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	*(u64 *)walk->iv = *iv;
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ivs[4 - 1];
+	u64 last_iv;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process four block batch */
+	if (nbytes >= bsize * 4) {
+		do {
+			nbytes -= bsize * 4 - bsize;
+			src -= 4 - 1;
+			dst -= 4 - 1;
+
+			ivs[0] = src[0];
+			ivs[1] = src[1];
+			ivs[2] = src[2];
+
+			blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
+
+			dst[1] ^= ivs[0];
+			dst[2] ^= ivs[1];
+			dst[3] ^= ivs[2];
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			*dst ^= *(src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * 4);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		*dst ^= *(src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	*dst ^= *(u64 *)walk->iv;
+	*(u64 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+	.cra_name		= "cbc(blowfish)",
+	.cra_driver_name	= "cbc-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+};
+
+static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u8 keystream[BF_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	blowfish_enc_blk(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, BF_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
+	__be64 ctrblocks[4];
+
+	/* Process four block batch */
+	if (nbytes >= bsize * 4) {
+		do {
+			if (dst != src) {
+				dst[0] = src[0];
+				dst[1] = src[1];
+				dst[2] = src[2];
+				dst[3] = src[3];
+			}
+
+			/* create ctrblks for parallel encrypt */
+			ctrblocks[0] = cpu_to_be64(ctrblk++);
+			ctrblocks[1] = cpu_to_be64(ctrblk++);
+			ctrblocks[2] = cpu_to_be64(ctrblk++);
+			ctrblocks[3] = cpu_to_be64(ctrblk++);
+
+			blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
+						  (u8 *)ctrblocks);
+
+			src += 4;
+			dst += 4;
+		} while ((nbytes -= bsize * 4) >= bsize * 4);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		ctrblocks[0] = cpu_to_be64(ctrblk++);
+
+		blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
+
+		src += 1;
+		dst += 1;
+	} while ((nbytes -= bsize) >= bsize);
+
+done:
+	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+
+	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "ctr(blowfish)",
+	.cra_driver_name	= "ctr-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
+static int __init init(void)
+{
+	int err;
+
+	err = crypto_register_alg(&bf_alg);
+	if (err)
+		goto bf_err;
+	err = crypto_register_alg(&blk_ecb_alg);
+	if (err)
+		goto ecb_err;
+	err = crypto_register_alg(&blk_cbc_alg);
+	if (err)
+		goto cbc_err;
+	err = crypto_register_alg(&blk_ctr_alg);
+	if (err)
+		goto ctr_err;
+
+	return 0;
+
+ctr_err:
+	crypto_unregister_alg(&blk_cbc_alg);
+cbc_err:
+	crypto_unregister_alg(&blk_ecb_alg);
+ecb_err:
+	crypto_unregister_alg(&bf_alg);
+bf_err:
+	return err;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&blk_ctr_alg);
+	crypto_unregister_alg(&blk_cbc_alg);
+	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_alg(&bf_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
+MODULE_ALIAS("blowfish");
+MODULE_ALIAS("blowfish-asm");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 108cb98..0763774 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -620,6 +620,21 @@ config CRYPTO_BLOWFISH_COMMON
 	  See also:
 	  <http://www.schneier.com/blowfish.html>
 
+config CRYPTO_BLOWFISH_X86_64
+	tristate "Blowfish cipher algorithm (x86_64)"
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLOWFISH_COMMON
+	help
+	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
+
+	  This is a variable key length cipher which can use keys from 32
+	  bits to 448 bits in length.  It's fast, simple and specifically
+	  designed for use on "large microprocessors".
+
+	  See also:
+	  <http://www.schneier.com/blowfish.html>
+
 config CRYPTO_CAMELLIA
 	tristate "Camellia cipher algorithms"
 	depends on CRYPTO

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation
  2011-09-01 22:45 ` [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation Jussi Kivilinna
@ 2011-09-19 11:19   ` Jussi Kivilinna
  2011-09-19 11:25     ` Herbert Xu
  0 siblings, 1 reply; 8+ messages in thread
From: Jussi Kivilinna @ 2011-09-19 11:19 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: linux-crypto, Herbert Xu, David S. Miller

Hello,

Is it just me but I notice there is very little activity going on  
linux-crypto mailing-list? No replies on patches etc. I cannot be sure  
as I'm new to this mailing-list.

So anyway, is this patch something that could be accepted? I have  
further improved the 4-way parallel functions and get 2.5x speed on  
ECB (1.3x speed compared to 128bit AES asm). I also have patches for  
3-way parallel twofish that with 1.3x speed up in ECB (1.0x speed  
compared to 128bit AES).

These patches have been tested with automated loop-device/dm-crypt  
filesystem tests (crosschecked crypted images (ecb/cbc/ctr modes)  
between patched and unpatched kernels).

-Jussi

Quoting Jussi Kivilinna <jussi.kivilinna@mbnet.fi>:

> Patch adds x86_64 assembly implementation of blowfish. Two set of assembler
> functions are provided. First set is regular 'one-block at time'
> encrypt/decrypt functions. Second is 'four-block at time' functions that
> gain performance increase on out-of-order CPUs. Performance of 4-way
> functions should be equal to 1-way functions with in-order CPUs.
>
> Summary of the tcrypt benchmarks:
>
> Blowfish assembler vs blowfish C (256bit 8kb block ECB)
> encrypt: 2.2x speed
> decrypt: 2.3x speed
>
> Blowfish assembler vs blowfish C (256bit 8kb block CBC)
> encrypt: 1.12x speed
> decrypt: 2.5x speed
>
> Blowfish assembler vs blowfish C (256bit 8kb block CTR)
> encrypt: 2.5x speed
>
> Full output:
> http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-blowfish-asm-x86_64.txt
> http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-blowfish-c-x86_64.txt
>
> Tests were run on:
>  vendor_id	: AuthenticAMD
>  cpu family	: 16
>  model		: 10
>  model name	: AMD Phenom(tm) II X6 1055T Processor
>  stepping	: 0
>
> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
> ---
>  arch/x86/crypto/Makefile                 |    2
>  arch/x86/crypto/blowfish-x86_64-asm_64.S |  392 ++++++++++++++++++++++++
>  arch/x86/crypto/blowfish_glue.c          |  487  
> ++++++++++++++++++++++++++++++
>  crypto/Kconfig                           |   15 +
>  4 files changed, 896 insertions(+), 0 deletions(-)
>  create mode 100644 arch/x86/crypto/blowfish-x86_64-asm_64.S
>  create mode 100644 arch/x86/crypto/blowfish_glue.c
>
> diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> index 57c7f7b..725addf 100644
> --- a/arch/x86/crypto/Makefile
> +++ b/arch/x86/crypto/Makefile
> @@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
>  obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
>
>  obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
> +obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
>  obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
>  obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
>  obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
> @@ -20,6 +21,7 @@ twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
>  salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
>
>  aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
> +blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
>  twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
>  salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
>
> diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S  
> b/arch/x86/crypto/blowfish-x86_64-asm_64.S
> new file mode 100644
> index 0000000..44eb23a
> --- /dev/null
> +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
> @@ -0,0 +1,392 @@
> +/*
> + * Blowfish Cipher Algorithm (x86_64)
> + *
> + * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
> + * USA
> + *
> + */
> +
> +.file "blowfish-x86_64-asm.S"
> +.text
> +
> +/* structure of crypto context */
> +#define p	0
> +#define s0	((16 + 2) * 4)
> +#define s1	((16 + 2 + (1 * 256)) * 4)
> +#define s2	((16 + 2 + (2 * 256)) * 4)
> +#define s3	((16 + 2 + (3 * 256)) * 4)
> +
> +/* register macros */
> +#define CTX %rdi
> +#define RIO %rsi
> +
> +#define RX0 %rax
> +#define RX1 %rbx
> +#define RX2 %rcx
> +#define RX3 %rdx
> +
> +#define RX0d %eax
> +#define RX1d %ebx
> +#define RX2d %ecx
> +#define RX3d %edx
> +
> +#define RX0bl %al
> +#define RX1bl %bl
> +#define RX2bl %cl
> +#define RX3bl %dl
> +
> +#define RX0bh %ah
> +#define RX1bh %bh
> +#define RX2bh %ch
> +#define RX3bh %dh
> +
> +#define RT0 %rbp
> +#define RT1 %rsi
> +
> +#define RT0d %ebp
> +#define RT1d %esi
> +
> +#define RK0 %r8
> +#define RK1 %r9
> +#define RK2 %r10
> +#define RK3 %r11
> +
> +#define RK0d %r8d
> +#define RK1d %r9d
> +#define RK2d %r10d
> +#define RK3d %r11d
> +
> +#define RKEY %r12
> +
> +/***********************************************************************
> + * 1-way blowfish
> + ***********************************************************************/
> +#define F(x, k) \
> +	rorq $16,		x; \
> +	movzbl x ## bh,		RT0d; \
> +	movzbl x ## bl,		RT1d; \
> +	rolq $16,		x; \
> +	movl s0(CTX,RT0,4),	k ## d; \
> +	addl s1(CTX,RT1,4),	k ## d; \
> +	movzbl x ## bh,		RT0d; \
> +	movzbl x ## bl,		RT1d; \
> +	rolq $32,		x; \
> +	xorl s2(CTX,RT0,4),	k ## d; \
> +	addl s3(CTX,RT1,4),	k ## d; \
> +	xorq k,			x;
> +
> +#define add_roundkey_enc(n) \
> +	xorq p+4*(n)(CTX), 	RX0;
> +
> +#define round_enc(n) \
> +	add_roundkey_enc(n); \
> +	\
> +	F(RX0, RK0); \
> +	F(RX0, RK0);
> +
> +#define round_final_enc(n) \
> +	xorq p+4*(n)(CTX), 	RX0;
> +
> +#define add_roundkey_dec(n) \
> +	movq p+4*(n-1)(CTX),	RT0; \
> +	rorq $32,		RT0; \
> +	xorq RT0,		RX0;
> +
> +#define round_dec(n) \
> +	add_roundkey_dec(n); \
> +	\
> +	F(RX0, RK0); \
> +	F(RX0, RK0); \
> +
> +#define read_block() \
> +	movq (RIO), 		RX0; \
> +	rorq $32, 		RX0; \
> +	bswapq 			RX0;
> +
> +#define write_block() \
> +	bswapq 			RX0; \
> +	movq RX0, 		(RIO);
> +
> +#define xor_block() \
> +	bswapq 			RX0; \
> +	xorq RX0, 		(RIO);
> +
> +.align 8
> +.global __blowfish_enc_blk
> +.type   __blowfish_enc_blk,@function;
> +
> +__blowfish_enc_blk:
> +	// input:
> +	//	%rdi: ctx, CTX
> +	//	%rsi: dst
> +	//	%rdx: src
> +	//	%rcx: bool xor
> +	pushq %rbp;
> +	pushq %rbx;
> +
> +	pushq %rsi;
> +	pushq %rcx;
> +	movq %rdx, RIO;
> +
> +	read_block();
> +
> +	round_enc(0);
> +	round_enc(2);
> +	round_enc(4);
> +	round_enc(6);
> +	round_enc(8);
> +	round_enc(10);
> +	round_enc(12);
> +	round_enc(14);
> +	add_roundkey_enc(16);
> +
> +	popq %rbp;
> +	popq RIO;
> +
> +	test %bpl, %bpl;
> +	jnz __enc_xor;
> +
> +	write_block();
> +
> +__enc_ret:
> +	popq %rbx;
> +	popq %rbp;
> +
> +	ret;
> +
> +__enc_xor:
> +	xor_block();
> +
> +	jmp __enc_ret;
> +
> +.align 8
> +.global blowfish_dec_blk
> +.type   blowfish_dec_blk,@function;
> +
> +blowfish_dec_blk:
> +	// input:
> +	//	%rdi: ctx, CTX
> +	//	%rsi: dst
> +	//	%rdx: src
> +	pushq %rbp;
> +	pushq %rbx;
> +
> +	pushq %rsi;
> +	movq %rdx, RIO;
> +
> +	read_block();
> +
> +	round_dec(17);
> +	round_dec(15);
> +	round_dec(13);
> +	round_dec(11);
> +	round_dec(9);
> +	round_dec(7);
> +	round_dec(5);
> +	round_dec(3);
> +	add_roundkey_dec(1);
> +
> +	popq RIO;
> +	write_block();
> +
> +	popq %rbx;
> +	popq %rbp;
> +
> +	ret;
> +
> +/**********************************************************************
> +  4-way blowfish, four blocks parallel
> + **********************************************************************/
> +#define add_preloaded_roundkey4() \
> +	xorq RKEY,		RX0; \
> +	xorq RKEY,		RX1; \
> +	xorq RKEY,		RX2; \
> +	xorq RKEY,		RX3;
> +
> +#define preload_roundkey_enc(n) \
> +	movq p+4*(n)(CTX),	RKEY;
> +
> +#define add_roundkey_enc4(n) \
> +	add_preloaded_roundkey4(); \
> +	preload_roundkey_enc(n + 2);
> +
> +#define round_enc4(n) \
> +	add_roundkey_enc4(n); \
> +	\
> +	F(RX0, RK0); \
> +	F(RX1, RK1); \
> +	F(RX2, RK2); \
> +	F(RX3, RK3); \
> +	\
> +	F(RX0, RK0); \
> +	F(RX1, RK1); \
> +	F(RX2, RK2); \
> +	F(RX3, RK3);
> +
> +#define preload_roundkey_dec(n) \
> +	movq p+4*((n)-1)(CTX),	RKEY; \
> +	rorq $32,		RKEY;
> +
> +#define add_roundkey_dec4(n) \
> +	add_preloaded_roundkey4(); \
> +	preload_roundkey_dec(n - 2);
> +
> +#define round_dec4(n) \
> +	add_roundkey_dec4(n); \
> +	\
> +	F(RX0, RK0); \
> +	F(RX1, RK1); \
> +	F(RX2, RK2); \
> +	F(RX3, RK3); \
> +	\
> +	F(RX0, RK0); \
> +	F(RX1, RK1); \
> +	F(RX2, RK2); \
> +	F(RX3, RK3);
> +
> +#define read_block4() \
> +	movq (RIO),		RX0; \
> +	rorq $32,		RX0; \
> +	bswapq 			RX0; \
> +	\
> +	movq 8(RIO),		RX1; \
> +	rorq $32,		RX1; \
> +	bswapq 			RX1; \
> +	\
> +	movq 16(RIO),		RX2; \
> +	rorq $32,		RX2; \
> +	bswapq 			RX2; \
> +	\
> +	movq 24(RIO),		RX3; \
> +	rorq $32,		RX3; \
> +	bswapq 			RX3;
> +
> +#define write_block4() \
> +	bswapq 			RX0; \
> +	movq RX0,		(RIO); \
> +	\
> +	bswapq 			RX1; \
> +	movq RX1,		8(RIO); \
> +	\
> +	bswapq 			RX2; \
> +	movq RX2,		16(RIO); \
> +	\
> +	bswapq 			RX3; \
> +	movq RX3,		24(RIO);
> +
> +#define xor_block4() \
> +	bswapq 			RX0; \
> +	xorq RX0,		(RIO); \
> +	\
> +	bswapq 			RX1; \
> +	xorq RX1,		8(RIO); \
> +	\
> +	bswapq 			RX2; \
> +	xorq RX2,		16(RIO); \
> +	\
> +	bswapq 			RX3; \
> +	xorq RX3,		24(RIO);
> +
> +.align 8
> +.global __blowfish_enc_blk_4way
> +.type   __blowfish_enc_blk_4way,@function;
> +
> +__blowfish_enc_blk_4way:
> +	// input:
> +	//	%rdi: ctx, CTX
> +	//	%rsi: dst
> +	//	%rdx: src
> +	//	%rcx: bool xor
> +	pushq %rbp;
> +	pushq %rbx;
> +	pushq RKEY;
> +	preload_roundkey_enc(0);
> +
> +	pushq %rsi;
> +	pushq %rcx;
> +	movq %rdx, RIO;
> +
> +	read_block4();
> +
> +	round_enc4(0);
> +	round_enc4(2);
> +	round_enc4(4);
> +	round_enc4(6);
> +	round_enc4(8);
> +	round_enc4(10);
> +	round_enc4(12);
> +	round_enc4(14);
> +	add_preloaded_roundkey4();
> +
> +	popq %rbp;
> +	popq RIO;
> +
> +	test %bpl, %bpl;
> +	jnz __enc_xor4;
> +
> +	write_block4();
> +
> +__enc_ret4:
> +	popq RKEY;
> +	popq %rbx;
> +	popq %rbp;
> +
> +	ret;
> +
> +__enc_xor4:
> +	xor_block4();
> +
> +	jmp __enc_ret4;
> +
> +.align 8
> +.global blowfish_dec_blk_4way
> +.type   blowfish_dec_blk_4way,@function;
> +
> +blowfish_dec_blk_4way:
> +	// input:
> +	//	%rdi: ctx, CTX
> +	//	%rsi: dst
> +	//	%rdx: src
> +	pushq %rbp;
> +	pushq %rbx;
> +	pushq RKEY;
> +	preload_roundkey_dec(17);
> +
> +	pushq %rsi;
> +	movq %rdx, RIO;
> +
> +	read_block4();
> +
> +	round_dec4(17);
> +	round_dec4(15);
> +	round_dec4(13);
> +	round_dec4(11);
> +	round_dec4(9);
> +	round_dec4(7);
> +	round_dec4(5);
> +	round_dec4(3);
> +	add_preloaded_roundkey4();
> +
> +	popq RIO;
> +	write_block4();
> +
> +	popq RKEY;
> +	popq %rbx;
> +	popq %rbp;
> +
> +	ret;
> +
> diff --git a/arch/x86/crypto/blowfish_glue.c  
> b/arch/x86/crypto/blowfish_glue.c
> new file mode 100644
> index 0000000..40911ab
> --- /dev/null
> +++ b/arch/x86/crypto/blowfish_glue.c
> @@ -0,0 +1,487 @@
> +/*
> + * Glue Code for assembler optimized version of Blowfish
> + *
> + * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
> + * USA
> + *
> + */
> +
> +#include <crypto/blowfish.h>
> +#include <linux/crypto.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <crypto/algapi.h>
> +
> +/* regular block cipher functions */
> +asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst,  
> const u8 *src,
> +				   bool xor);
> +asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const  
> u8 *src);
> +
> +/* 4-way parallel cipher functions */
> +asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
> +					const u8 *src, bool xor);
> +asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
> +				      const u8 *src);
> +
> +static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst,  
> const u8 *src)
> +{
> +	__blowfish_enc_blk(ctx, dst, src, false);
> +}
> +
> +static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
> +					const u8 *src)
> +{
> +	__blowfish_enc_blk(ctx, dst, src, true);
> +}
> +
> +static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
> +					 const u8 *src)
> +{
> +	__blowfish_enc_blk_4way(ctx, dst, src, false);
> +}
> +
> +static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
> +				      const u8 *src)
> +{
> +	__blowfish_enc_blk_4way(ctx, dst, src, true);
> +}
> +
> +static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
> +{
> +	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
> +}
> +
> +static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
> +{
> +	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
> +}
> +
> +static struct crypto_alg bf_alg = {
> +	.cra_name		=	"blowfish",
> +	.cra_driver_name	=	"blowfish-asm",
> +	.cra_priority		=	200,
> +	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
> +	.cra_blocksize		=	BF_BLOCK_SIZE,
> +	.cra_ctxsize		=	sizeof(struct bf_ctx),
> +	.cra_alignmask		=	3,
> +	.cra_module		=	THIS_MODULE,
> +	.cra_list		=	LIST_HEAD_INIT(bf_alg.cra_list),
> +	.cra_u			=	{
> +		.cipher = {
> +			.cia_min_keysize	=	BF_MIN_KEY_SIZE,
> +			.cia_max_keysize	=	BF_MAX_KEY_SIZE,
> +			.cia_setkey		=	blowfish_setkey,
> +			.cia_encrypt		=	blowfish_encrypt,
> +			.cia_decrypt		=	blowfish_decrypt,
> +		}
> +	}
> +};
> +
> +static int ecb_crypt(struct blkcipher_desc *desc, struct  
> blkcipher_walk *walk,
> +		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
> +		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
> +{
> +	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
> +	unsigned int bsize = BF_BLOCK_SIZE;
> +	unsigned int nbytes;
> +	int err;
> +
> +	err = blkcipher_walk_virt(desc, walk);
> +
> +	while ((nbytes = walk->nbytes)) {
> +		u8 *wsrc = walk->src.virt.addr;
> +		u8 *wdst = walk->dst.virt.addr;
> +
> +		/* Process four block batch */
> +		if (nbytes >= bsize * 4) {
> +			do {
> +				fn_4way(ctx, wdst, wsrc);
> +
> +				wsrc += bsize * 4;
> +				wdst += bsize * 4;
> +				nbytes -= bsize * 4;
> +			} while (nbytes >= bsize * 4);
> +
> +			if (nbytes < bsize)
> +				goto done;
> +		}
> +
> +		/* Handle leftovers */
> +		do {
> +			fn(ctx, wdst, wsrc);
> +
> +			wsrc += bsize;
> +			wdst += bsize;
> +			nbytes -= bsize;
> +		} while (nbytes >= bsize);
> +
> +done:
> +		err = blkcipher_walk_done(desc, walk, nbytes);
> +	}
> +
> +	return err;
> +}
> +
> +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
> +		       struct scatterlist *src, unsigned int nbytes)
> +{
> +	struct blkcipher_walk walk;
> +
> +	blkcipher_walk_init(&walk, dst, src, nbytes);
> +	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
> +}
> +
> +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
> +		       struct scatterlist *src, unsigned int nbytes)
> +{
> +	struct blkcipher_walk walk;
> +
> +	blkcipher_walk_init(&walk, dst, src, nbytes);
> +	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
> +}
> +
> +static struct crypto_alg blk_ecb_alg = {
> +	.cra_name		= "ecb(blowfish)",
> +	.cra_driver_name	= "ecb-blowfish-asm",
> +	.cra_priority		= 300,
> +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
> +	.cra_blocksize		= BF_BLOCK_SIZE,
> +	.cra_ctxsize		= sizeof(struct bf_ctx),
> +	.cra_alignmask		= 0,
> +	.cra_type		= &crypto_blkcipher_type,
> +	.cra_module		= THIS_MODULE,
> +	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
> +	.cra_u = {
> +		.blkcipher = {
> +			.min_keysize	= BF_MIN_KEY_SIZE,
> +			.max_keysize	= BF_MAX_KEY_SIZE,
> +			.setkey		= blowfish_setkey,
> +			.encrypt	= ecb_encrypt,
> +			.decrypt	= ecb_decrypt,
> +		},
> +	},
> +};
> +
> +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
> +				  struct blkcipher_walk *walk)
> +{
> +	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
> +	unsigned int bsize = BF_BLOCK_SIZE;
> +	unsigned int nbytes = walk->nbytes;
> +	u64 *src = (u64 *)walk->src.virt.addr;
> +	u64 *dst = (u64 *)walk->dst.virt.addr;
> +	u64 *iv = (u64 *)walk->iv;
> +
> +	do {
> +		*dst = *src ^ *iv;
> +		blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
> +		iv = dst;
> +
> +		src += 1;
> +		dst += 1;
> +		nbytes -= bsize;
> +	} while (nbytes >= bsize);
> +
> +	*(u64 *)walk->iv = *iv;
> +	return nbytes;
> +}
> +
> +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
> +		       struct scatterlist *src, unsigned int nbytes)
> +{
> +	struct blkcipher_walk walk;
> +	int err;
> +
> +	blkcipher_walk_init(&walk, dst, src, nbytes);
> +	err = blkcipher_walk_virt(desc, &walk);
> +
> +	while ((nbytes = walk.nbytes)) {
> +		nbytes = __cbc_encrypt(desc, &walk);
> +		err = blkcipher_walk_done(desc, &walk, nbytes);
> +	}
> +
> +	return err;
> +}
> +
> +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
> +				  struct blkcipher_walk *walk)
> +{
> +	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
> +	unsigned int bsize = BF_BLOCK_SIZE;
> +	unsigned int nbytes = walk->nbytes;
> +	u64 *src = (u64 *)walk->src.virt.addr;
> +	u64 *dst = (u64 *)walk->dst.virt.addr;
> +	u64 ivs[4 - 1];
> +	u64 last_iv;
> +
> +	/* Start of the last block. */
> +	src += nbytes / bsize - 1;
> +	dst += nbytes / bsize - 1;
> +
> +	last_iv = *src;
> +
> +	/* Process four block batch */
> +	if (nbytes >= bsize * 4) {
> +		do {
> +			nbytes -= bsize * 4 - bsize;
> +			src -= 4 - 1;
> +			dst -= 4 - 1;
> +
> +			ivs[0] = src[0];
> +			ivs[1] = src[1];
> +			ivs[2] = src[2];
> +
> +			blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
> +
> +			dst[1] ^= ivs[0];
> +			dst[2] ^= ivs[1];
> +			dst[3] ^= ivs[2];
> +
> +			nbytes -= bsize;
> +			if (nbytes < bsize)
> +				goto done;
> +
> +			*dst ^= *(src - 1);
> +			src -= 1;
> +			dst -= 1;
> +		} while (nbytes >= bsize * 4);
> +
> +		if (nbytes < bsize)
> +			goto done;
> +	}
> +
> +	/* Handle leftovers */
> +	for (;;) {
> +		blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
> +
> +		nbytes -= bsize;
> +		if (nbytes < bsize)
> +			break;
> +
> +		*dst ^= *(src - 1);
> +		src -= 1;
> +		dst -= 1;
> +	}
> +
> +done:
> +	*dst ^= *(u64 *)walk->iv;
> +	*(u64 *)walk->iv = last_iv;
> +
> +	return nbytes;
> +}
> +
> +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
> +		       struct scatterlist *src, unsigned int nbytes)
> +{
> +	struct blkcipher_walk walk;
> +	int err;
> +
> +	blkcipher_walk_init(&walk, dst, src, nbytes);
> +	err = blkcipher_walk_virt(desc, &walk);
> +
> +	while ((nbytes = walk.nbytes)) {
> +		nbytes = __cbc_decrypt(desc, &walk);
> +		err = blkcipher_walk_done(desc, &walk, nbytes);
> +	}
> +
> +	return err;
> +}
> +
> +static struct crypto_alg blk_cbc_alg = {
> +	.cra_name		= "cbc(blowfish)",
> +	.cra_driver_name	= "cbc-blowfish-asm",
> +	.cra_priority		= 300,
> +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
> +	.cra_blocksize		= BF_BLOCK_SIZE,
> +	.cra_ctxsize		= sizeof(struct bf_ctx),
> +	.cra_alignmask		= 0,
> +	.cra_type		= &crypto_blkcipher_type,
> +	.cra_module		= THIS_MODULE,
> +	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
> +	.cra_u = {
> +		.blkcipher = {
> +			.min_keysize	= BF_MIN_KEY_SIZE,
> +			.max_keysize	= BF_MAX_KEY_SIZE,
> +			.ivsize		= BF_BLOCK_SIZE,
> +			.setkey		= blowfish_setkey,
> +			.encrypt	= cbc_encrypt,
> +			.decrypt	= cbc_decrypt,
> +		},
> +	},
> +};
> +
> +static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
> +{
> +	u8 *ctrblk = walk->iv;
> +	u8 keystream[BF_BLOCK_SIZE];
> +	u8 *src = walk->src.virt.addr;
> +	u8 *dst = walk->dst.virt.addr;
> +	unsigned int nbytes = walk->nbytes;
> +
> +	blowfish_enc_blk(ctx, keystream, ctrblk);
> +	crypto_xor(keystream, src, nbytes);
> +	memcpy(dst, keystream, nbytes);
> +
> +	crypto_inc(ctrblk, BF_BLOCK_SIZE);
> +}
> +
> +static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
> +				struct blkcipher_walk *walk)
> +{
> +	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
> +	unsigned int bsize = BF_BLOCK_SIZE;
> +	unsigned int nbytes = walk->nbytes;
> +	u64 *src = (u64 *)walk->src.virt.addr;
> +	u64 *dst = (u64 *)walk->dst.virt.addr;
> +	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
> +	__be64 ctrblocks[4];
> +
> +	/* Process four block batch */
> +	if (nbytes >= bsize * 4) {
> +		do {
> +			if (dst != src) {
> +				dst[0] = src[0];
> +				dst[1] = src[1];
> +				dst[2] = src[2];
> +				dst[3] = src[3];
> +			}
> +
> +			/* create ctrblks for parallel encrypt */
> +			ctrblocks[0] = cpu_to_be64(ctrblk++);
> +			ctrblocks[1] = cpu_to_be64(ctrblk++);
> +			ctrblocks[2] = cpu_to_be64(ctrblk++);
> +			ctrblocks[3] = cpu_to_be64(ctrblk++);
> +
> +			blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
> +						  (u8 *)ctrblocks);
> +
> +			src += 4;
> +			dst += 4;
> +		} while ((nbytes -= bsize * 4) >= bsize * 4);
> +
> +		if (nbytes < bsize)
> +			goto done;
> +	}
> +
> +	/* Handle leftovers */
> +	do {
> +		if (dst != src)
> +			*dst = *src;
> +
> +		ctrblocks[0] = cpu_to_be64(ctrblk++);
> +
> +		blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
> +
> +		src += 1;
> +		dst += 1;
> +	} while ((nbytes -= bsize) >= bsize);
> +
> +done:
> +	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
> +	return nbytes;
> +}
> +
> +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
> +		     struct scatterlist *src, unsigned int nbytes)
> +{
> +	struct blkcipher_walk walk;
> +	int err;
> +
> +	blkcipher_walk_init(&walk, dst, src, nbytes);
> +	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
> +
> +	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
> +		nbytes = __ctr_crypt(desc, &walk);
> +		err = blkcipher_walk_done(desc, &walk, nbytes);
> +	}
> +
> +	if (walk.nbytes) {
> +		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
> +		err = blkcipher_walk_done(desc, &walk, 0);
> +	}
> +
> +	return err;
> +}
> +
> +static struct crypto_alg blk_ctr_alg = {
> +	.cra_name		= "ctr(blowfish)",
> +	.cra_driver_name	= "ctr-blowfish-asm",
> +	.cra_priority		= 300,
> +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
> +	.cra_blocksize		= BF_BLOCK_SIZE,
> +	.cra_ctxsize		= sizeof(struct bf_ctx),
> +	.cra_alignmask		= 0,
> +	.cra_type		= &crypto_blkcipher_type,
> +	.cra_module		= THIS_MODULE,
> +	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
> +	.cra_u = {
> +		.blkcipher = {
> +			.min_keysize	= BF_MIN_KEY_SIZE,
> +			.max_keysize	= BF_MAX_KEY_SIZE,
> +			.ivsize		= BF_BLOCK_SIZE,
> +			.setkey		= blowfish_setkey,
> +			.encrypt	= ctr_crypt,
> +			.decrypt	= ctr_crypt,
> +		},
> +	},
> +};
> +
> +static int __init init(void)
> +{
> +	int err;
> +
> +	err = crypto_register_alg(&bf_alg);
> +	if (err)
> +		goto bf_err;
> +	err = crypto_register_alg(&blk_ecb_alg);
> +	if (err)
> +		goto ecb_err;
> +	err = crypto_register_alg(&blk_cbc_alg);
> +	if (err)
> +		goto cbc_err;
> +	err = crypto_register_alg(&blk_ctr_alg);
> +	if (err)
> +		goto ctr_err;
> +
> +	return 0;
> +
> +ctr_err:
> +	crypto_unregister_alg(&blk_cbc_alg);
> +cbc_err:
> +	crypto_unregister_alg(&blk_ecb_alg);
> +ecb_err:
> +	crypto_unregister_alg(&bf_alg);
> +bf_err:
> +	return err;
> +}
> +
> +static void __exit fini(void)
> +{
> +	crypto_unregister_alg(&blk_ctr_alg);
> +	crypto_unregister_alg(&blk_cbc_alg);
> +	crypto_unregister_alg(&blk_ecb_alg);
> +	crypto_unregister_alg(&bf_alg);
> +}
> +
> +module_init(init);
> +module_exit(fini);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
> +MODULE_ALIAS("blowfish");
> +MODULE_ALIAS("blowfish-asm");
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index 108cb98..0763774 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -620,6 +620,21 @@ config CRYPTO_BLOWFISH_COMMON
>  	  See also:
>  	  <http://www.schneier.com/blowfish.html>
>
> +config CRYPTO_BLOWFISH_X86_64
> +	tristate "Blowfish cipher algorithm (x86_64)"
> +	depends on (X86 || UML_X86) && 64BIT
> +	select CRYPTO_ALGAPI
> +	select CRYPTO_BLOWFISH_COMMON
> +	help
> +	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
> +
> +	  This is a variable key length cipher which can use keys from 32
> +	  bits to 448 bits in length.  It's fast, simple and specifically
> +	  designed for use on "large microprocessors".
> +
> +	  See also:
> +	  <http://www.schneier.com/blowfish.html>
> +
>  config CRYPTO_CAMELLIA
>  	tristate "Camellia cipher algorithms"
>  	depends on CRYPTO
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation
  2011-09-19 11:19   ` Jussi Kivilinna
@ 2011-09-19 11:25     ` Herbert Xu
  0 siblings, 0 replies; 8+ messages in thread
From: Herbert Xu @ 2011-09-19 11:25 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: linux-crypto, David S. Miller

On Mon, Sep 19, 2011 at 02:19:22PM +0300, Jussi Kivilinna wrote:
>
> Is it just me but I notice there is very little activity going on  
> linux-crypto mailing-list? No replies on patches etc. I cannot be sure  
> as I'm new to this mailing-list.

Your patches are in my queue and will be processed.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] crypto: blowfish - split generic and common c code
  2011-09-01 22:45 [[RFC] PATCH 1/4] crypto: blowfish: split generic and common c code Jussi Kivilinna
                   ` (2 preceding siblings ...)
  2011-09-01 22:45 ` [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation Jussi Kivilinna
@ 2011-09-22 11:29 ` Herbert Xu
  2011-09-22 14:25   ` Jussi Kivilinna
  3 siblings, 1 reply; 8+ messages in thread
From: Herbert Xu @ 2011-09-22 11:29 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: linux-crypto, David S. Miller

On Fri, Sep 02, 2011 at 01:45:07AM +0300, Jussi Kivilinna wrote:
> Patch splits up the blowfish crypto routine into a common part (key setup)
> which will be used by blowfish crypto modules (x86_64 assembly and generic-c).
> 
> Also fixes errors/warnings reported by checkpatch.
> 
> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>

All applied to cryptodev.

Thanks!
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] crypto: blowfish - split generic and common c code
  2011-09-22 11:29 ` [PATCH 1/4] crypto: blowfish - split generic and common c code Herbert Xu
@ 2011-09-22 14:25   ` Jussi Kivilinna
  0 siblings, 0 replies; 8+ messages in thread
From: Jussi Kivilinna @ 2011-09-22 14:25 UTC (permalink / raw)
  To: Herbert Xu; +Cc: linux-crypto, David S. Miller

Quoting Herbert Xu <herbert@gondor.apana.org.au>:

> On Fri, Sep 02, 2011 at 01:45:07AM +0300, Jussi Kivilinna wrote:
>> Patch splits up the blowfish crypto routine into a common part (key setup)
>> which will be used by blowfish crypto modules (x86_64 assembly and  
>> generic-c).
>>
>> Also fixes errors/warnings reported by checkpatch.
>>
>> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
>
> All applied to cryptodev.

Ok, thanks!

I'll send the further improvements/changes as new patches on top of those.

-Jussi

>
> Thanks!
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-09-22 14:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-01 22:45 [[RFC] PATCH 1/4] crypto: blowfish: split generic and common c code Jussi Kivilinna
2011-09-01 22:45 ` [[RFC] PATCH 2/4] crypto: blowfish: rename C-version to blowfish_generic Jussi Kivilinna
2011-09-01 22:45 ` [[RFC] PATCH 3/4] crypto: tcrypt: add ctr(blowfish) speed test Jussi Kivilinna
2011-09-01 22:45 ` [[RFC] PATCH 4/4] crypto: blowfish: add x86_64 assembly implementation Jussi Kivilinna
2011-09-19 11:19   ` Jussi Kivilinna
2011-09-19 11:25     ` Herbert Xu
2011-09-22 11:29 ` [PATCH 1/4] crypto: blowfish - split generic and common c code Herbert Xu
2011-09-22 14:25   ` Jussi Kivilinna

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.