All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
@ 2014-03-12 18:47 chandramouli narayanan
  2014-03-14  5:40 ` Marek Vasut
  0 siblings, 1 reply; 8+ messages in thread
From: chandramouli narayanan @ 2014-03-12 18:47 UTC (permalink / raw)
  To: herbert, davem, hpa
  Cc: ilya.albrekht, maxim.locktyukhin, ronen.zohar, wajdi.k.feghali,
	tim.c.chen, linux-crypto

This git patch adds the glue, build and configuration changes
to include x86_64 AVX2 optimization of SHA1 transform to
crypto support. The patch has been tested with 3.14.0-rc1
kernel.

Changes from the initial version of this patch are in
a) check for BMI2 in addition to AVX2 support since
__sha1_transform_avx2() uses rorx
b) Since the module build has dependency on 64bit, it is
redundant to check it in the code here.

On a Haswell desktop, with turbo disabled and all cpus running
at maximum frequency, tcrypt shows AVX2 performance improvement
from 3% for 256 bytes update to 16% for 1024 bytes update over
AVX implementation. 

Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 6ba54d6..61d6e28 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+ifeq ($(avx2_supported),yes)
+sha1-ssse3-y += sha1_avx2_x86_64_asm.o
+endif
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 4a11a9d..bdd6295 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -10,6 +10,7 @@
  * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
  * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
  * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
 asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
 				   unsigned int rounds);
 #endif
+#ifdef CONFIG_AS_AVX2
+#define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
+
+asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
+				unsigned int rounds);
+#endif
 
 static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
 
@@ -165,6 +172,19 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
 	return 0;
 }
 
+#ifdef CONFIG_AS_AVX2
+static void __sha1_transform_avx2(u32 *digest, const char *data,
+				unsigned int rounds)
+{
+
+	/* Select the optimal transform based on data block size */
+	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
+		sha1_transform_avx2(digest, data, rounds);
+	else
+		sha1_transform_avx(digest, data, rounds);
+}
+#endif
+
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_ssse3_init,
@@ -189,7 +209,11 @@ static bool __init avx_usable(void)
 {
 	u64 xcr0;
 
+#if defined(CONFIG_AS_AVX2)
+	if (!cpu_has_avx || !cpu_has_avx2 || !cpu_has_osxsave)
+#else
 	if (!cpu_has_avx || !cpu_has_osxsave)
+#endif
 		return false;
 
 	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
@@ -205,23 +229,35 @@ static bool __init avx_usable(void)
 
 static int __init sha1_ssse3_mod_init(void)
 {
+	char *algo_name;
 	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
+	if (cpu_has_ssse3) {
 		sha1_transform_asm = sha1_transform_ssse3;
+		algo_name = "SSSE3";
+	}
 
 #ifdef CONFIG_AS_AVX
 	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable())
-		sha1_transform_asm = sha1_transform_avx;
+	if (avx_usable()) {
+		if (cpu_has_avx) {
+			sha1_transform_asm = sha1_transform_avx;
+			algo_name = "AVX";
+		}
+#ifdef CONFIG_AS_AVX2
+		if (cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI2)) {
+			/* allow AVX2 to override AVX, it's a little faster */
+			sha1_transform_asm = __sha1_transform_avx2;
+			algo_name = "AVX2";
+		}
+#endif
+	}
 #endif
 
 	if (sha1_transform_asm) {
-		pr_info("Using %s optimized SHA-1 implementation\n",
-		        sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
-		                                                   : "AVX");
+		pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
 		return crypto_register_shash(&alg);
 	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
+	pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
 
 	return -ENODEV;
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7bcb70d..ce4012a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -491,14 +491,14 @@ config CRYPTO_SHA1
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA1_SSSE3
-	tristate "SHA1 digest algorithm (SSSE3/AVX)"
+	tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)"
 	depends on X86 && 64BIT
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
 	help
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
 	  using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
-	  Extensions (AVX), when available.
+	  Extensions (AVX/AVX2), when available.
 
 config CRYPTO_SHA256_SSSE3
 	tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)"

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-12 18:47 [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2 chandramouli narayanan
@ 2014-03-14  5:40 ` Marek Vasut
  2014-03-17 15:53   ` chandramouli narayanan
  0 siblings, 1 reply; 8+ messages in thread
From: Marek Vasut @ 2014-03-14  5:40 UTC (permalink / raw)
  To: chandramouli narayanan
  Cc: herbert, davem, hpa, ilya.albrekht, maxim.locktyukhin,
	ronen.zohar, wajdi.k.feghali, tim.c.chen, linux-crypto

On Wednesday, March 12, 2014 at 07:47:50 PM, chandramouli narayanan wrote:
> This git patch adds the glue, build and configuration changes
> to include x86_64 AVX2 optimization of SHA1 transform to
> crypto support. The patch has been tested with 3.14.0-rc1
> kernel.
> 
> Changes from the initial version of this patch are in
> a) check for BMI2 in addition to AVX2 support since
> __sha1_transform_avx2() uses rorx
> b) Since the module build has dependency on 64bit, it is
> redundant to check it in the code here.
> 
> On a Haswell desktop, with turbo disabled and all cpus running
> at maximum frequency, tcrypt shows AVX2 performance improvement
> from 3% for 256 bytes update to 16% for 1024 bytes update over
> AVX implementation.
> 
> Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
> 
> diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> index 6ba54d6..61d6e28 100644
> --- a/arch/x86/crypto/Makefile
> +++ b/arch/x86/crypto/Makefile
> @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
> fpu.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
>  ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o
> ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o
> sha1_ssse3_glue.o
> +ifeq ($(avx2_supported),yes)
> +sha1-ssse3-y += sha1_avx2_x86_64_asm.o

Use:

sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o

And you will not need the CONFIG_AS_AVX2 ifdef in your previous patch, no ?
[...]
Best regards,
Marek Vasut

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-14  5:40 ` Marek Vasut
@ 2014-03-17 15:53   ` chandramouli narayanan
  2014-03-17 16:06     ` Marek Vasut
  0 siblings, 1 reply; 8+ messages in thread
From: chandramouli narayanan @ 2014-03-17 15:53 UTC (permalink / raw)
  To: Marek Vasut
  Cc: herbert, davem, hpa, ilya.albrekht, maxim.locktyukhin,
	ronen.zohar, wajdi.k.feghali, tim.c.chen, linux-crypto

On Fri, 2014-03-14 at 06:40 +0100, Marek Vasut wrote:
> On Wednesday, March 12, 2014 at 07:47:50 PM, chandramouli narayanan wrote:
> > This git patch adds the glue, build and configuration changes
> > to include x86_64 AVX2 optimization of SHA1 transform to
> > crypto support. The patch has been tested with 3.14.0-rc1
> > kernel.
> > 
> > Changes from the initial version of this patch are in
> > a) check for BMI2 in addition to AVX2 support since
> > __sha1_transform_avx2() uses rorx
> > b) Since the module build has dependency on 64bit, it is
> > redundant to check it in the code here.
> > 
> > On a Haswell desktop, with turbo disabled and all cpus running
> > at maximum frequency, tcrypt shows AVX2 performance improvement
> > from 3% for 256 bytes update to 16% for 1024 bytes update over
> > AVX implementation.
> > 
> > Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
> > 
> > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > index 6ba54d6..61d6e28 100644
> > --- a/arch/x86/crypto/Makefile
> > +++ b/arch/x86/crypto/Makefile
> > @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
> > fpu.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
> >  ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o
> > ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o
> > sha1_ssse3_glue.o
> > +ifeq ($(avx2_supported),yes)
> > +sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> 
> Use:
> 
> sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o
> 
> And you will not need the CONFIG_AS_AVX2 ifdef in your previous patch, no ?
> [...]
> Best regards,
> Marek Vasut
Sorry for the delayed reply. Agreed, I will fix the dependency.

thanks
- mouli

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-17 15:53   ` chandramouli narayanan
@ 2014-03-17 16:06     ` Marek Vasut
  2014-03-17 16:53       ` chandramouli narayanan
  0 siblings, 1 reply; 8+ messages in thread
From: Marek Vasut @ 2014-03-17 16:06 UTC (permalink / raw)
  To: chandramouli narayanan
  Cc: herbert, davem, hpa, ilya.albrekht, maxim.locktyukhin,
	ronen.zohar, wajdi.k.feghali, tim.c.chen, linux-crypto

On Monday, March 17, 2014 at 04:53:12 PM, chandramouli narayanan wrote:
> On Fri, 2014-03-14 at 06:40 +0100, Marek Vasut wrote:
> > On Wednesday, March 12, 2014 at 07:47:50 PM, chandramouli narayanan wrote:
> > > This git patch adds the glue, build and configuration changes
> > > to include x86_64 AVX2 optimization of SHA1 transform to
> > > crypto support. The patch has been tested with 3.14.0-rc1
> > > kernel.
> > > 
> > > Changes from the initial version of this patch are in
> > > a) check for BMI2 in addition to AVX2 support since
> > > __sha1_transform_avx2() uses rorx
> > > b) Since the module build has dependency on 64bit, it is
> > > redundant to check it in the code here.
> > > 
> > > On a Haswell desktop, with turbo disabled and all cpus running
> > > at maximum frequency, tcrypt shows AVX2 performance improvement
> > > from 3% for 256 bytes update to 16% for 1024 bytes update over
> > > AVX implementation.
> > > 
> > > Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
> > > 
> > > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > > index 6ba54d6..61d6e28 100644
> > > --- a/arch/x86/crypto/Makefile
> > > +++ b/arch/x86/crypto/Makefile
> > > @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
> > > fpu.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
> > > 
> > >  ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o
> > > 
> > > ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o
> > > sha1_ssse3_glue.o
> > > +ifeq ($(avx2_supported),yes)
> > > +sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> > 
> > Use:
> > 
> > sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o
> > 
> > And you will not need the CONFIG_AS_AVX2 ifdef in your previous patch, no
> > ? [...]
> > Best regards,
> > Marek Vasut
> 
> Sorry for the delayed reply. Agreed, I will fix the dependency.

No problem, thanks!

Best regards,
Marek Vasut

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-17 16:06     ` Marek Vasut
@ 2014-03-17 16:53       ` chandramouli narayanan
  2014-03-17 18:11         ` Marek Vasut
  2014-03-17 18:37         ` H. Peter Anvin
  0 siblings, 2 replies; 8+ messages in thread
From: chandramouli narayanan @ 2014-03-17 16:53 UTC (permalink / raw)
  To: Marek Vasut
  Cc: herbert, davem, hpa, ilya.albrekht, maxim.locktyukhin,
	ronen.zohar, wajdi.k.feghali, tim.c.chen, linux-crypto

On Mon, 2014-03-17 at 17:06 +0100, Marek Vasut wrote:
> On Monday, March 17, 2014 at 04:53:12 PM, chandramouli narayanan wrote:
> > On Fri, 2014-03-14 at 06:40 +0100, Marek Vasut wrote:
> > > On Wednesday, March 12, 2014 at 07:47:50 PM, chandramouli narayanan wrote:
> > > > This git patch adds the glue, build and configuration changes
> > > > to include x86_64 AVX2 optimization of SHA1 transform to
> > > > crypto support. The patch has been tested with 3.14.0-rc1
> > > > kernel.
> > > > 
> > > > Changes from the initial version of this patch are in
> > > > a) check for BMI2 in addition to AVX2 support since
> > > > __sha1_transform_avx2() uses rorx
> > > > b) Since the module build has dependency on 64bit, it is
> > > > redundant to check it in the code here.
> > > > 
> > > > On a Haswell desktop, with turbo disabled and all cpus running
> > > > at maximum frequency, tcrypt shows AVX2 performance improvement
> > > > from 3% for 256 bytes update to 16% for 1024 bytes update over
> > > > AVX implementation.
> > > > 
> > > > Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
> > > > 
> > > > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > > > index 6ba54d6..61d6e28 100644
> > > > --- a/arch/x86/crypto/Makefile
> > > > +++ b/arch/x86/crypto/Makefile
> > > > @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
> > > > fpu.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
> > > > 
> > > >  ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o
> > > > 
> > > > ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o
> > > > sha1_ssse3_glue.o
> > > > +ifeq ($(avx2_supported),yes)
> > > > +sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> > > 
> > > Use:
> > > 
> > > sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o
> > > 
> > > And you will not need the CONFIG_AS_AVX2 ifdef in your previous patch, no
> > > ? [...]
> > > Best regards,
> > > Marek Vasut
> > 
> > Sorry for the delayed reply. Agreed, I will fix the dependency.
> 
> No problem, thanks!
> 
> Best regards,
> Marek Vasut
On second thoughts, with sha1-sse3-(CONFIG_AS_AVX2) +=
sha1_avx2_x86_64_asm.o, I have build issues and sha1_transform_avx2
undefined in sha1-sss3.ko. 

I can rid #ifdef CONFIG_AS_AVX2 in patch1. The following works though:
ifeq ($(avx2_supported),yes)
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
endif

thanks
- mouli

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-17 16:53       ` chandramouli narayanan
@ 2014-03-17 18:11         ` Marek Vasut
  2014-03-17 19:09           ` chandramouli narayanan
  2014-03-17 18:37         ` H. Peter Anvin
  1 sibling, 1 reply; 8+ messages in thread
From: Marek Vasut @ 2014-03-17 18:11 UTC (permalink / raw)
  To: chandramouli narayanan
  Cc: herbert, davem, hpa, ilya.albrekht, maxim.locktyukhin,
	ronen.zohar, wajdi.k.feghali, tim.c.chen, linux-crypto

On Monday, March 17, 2014 at 05:53:52 PM, chandramouli narayanan wrote:
> On Mon, 2014-03-17 at 17:06 +0100, Marek Vasut wrote:
> > On Monday, March 17, 2014 at 04:53:12 PM, chandramouli narayanan wrote:
> > > On Fri, 2014-03-14 at 06:40 +0100, Marek Vasut wrote:
> > > > On Wednesday, March 12, 2014 at 07:47:50 PM, chandramouli narayanan 
wrote:
> > > > > This git patch adds the glue, build and configuration changes
> > > > > to include x86_64 AVX2 optimization of SHA1 transform to
> > > > > crypto support. The patch has been tested with 3.14.0-rc1
> > > > > kernel.
> > > > > 
> > > > > Changes from the initial version of this patch are in
> > > > > a) check for BMI2 in addition to AVX2 support since
> > > > > __sha1_transform_avx2() uses rorx
> > > > > b) Since the module build has dependency on 64bit, it is
> > > > > redundant to check it in the code here.
> > > > > 
> > > > > On a Haswell desktop, with turbo disabled and all cpus running
> > > > > at maximum frequency, tcrypt shows AVX2 performance improvement
> > > > > from 3% for 256 bytes update to 16% for 1024 bytes update over
> > > > > AVX implementation.
> > > > > 
> > > > > Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
> > > > > 
> > > > > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > > > > index 6ba54d6..61d6e28 100644
> > > > > --- a/arch/x86/crypto/Makefile
> > > > > +++ b/arch/x86/crypto/Makefile
> > > > > @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o
> > > > > aesni-intel_glue.o fpu.o aesni-intel-$(CONFIG_64BIT) +=
> > > > > aesni-intel_avx-x86_64.o
> > > > > 
> > > > >  ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o
> > > > > 
> > > > > ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o
> > > > > sha1_ssse3_glue.o
> > > > > +ifeq ($(avx2_supported),yes)
> > > > > +sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> > > > 
> > > > Use:
> > > > 
> > > > sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o
> > > > 
> > > > And you will not need the CONFIG_AS_AVX2 ifdef in your previous
> > > > patch, no ? [...]
> > > > Best regards,
> > > > Marek Vasut
> > > 
> > > Sorry for the delayed reply. Agreed, I will fix the dependency.
> > 
> > No problem, thanks!
> > 
> > Best regards,
> > Marek Vasut
> 
> On second thoughts, with sha1-sse3-(CONFIG_AS_AVX2) +=
> sha1_avx2_x86_64_asm.o, I have build issues and sha1_transform_avx2
> undefined in sha1-sss3.ko.
> 
> I can rid #ifdef CONFIG_AS_AVX2 in patch1. The following works though:
> ifeq ($(avx2_supported),yes)
> sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> endif

Looking throughout the arch/x86/crypto/Makefile , this sha1-ssse3.o thing is a 
bit odd I think. Why exactly does this not follow suit with the camellia or 
serpent ciphers ? I mean, look at their build rules, they handle all of 
SSE2/AVX/AVX2 implementation and it's build. Can we not clean up the SHA1-SSSE3 
to do exactly the same ? But please note I might be just plain wrong and if 
that's the case, let me know ;-)

btw. I noticed another nit in the code. You use __sha1_transform_avx2() , but 
there previous function using AVX1 is called sha1_transform_avx() . Drop those 
two underscores please for consistency's sake.

Thanks!

Best regards,
Marek Vasut

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-17 16:53       ` chandramouli narayanan
  2014-03-17 18:11         ` Marek Vasut
@ 2014-03-17 18:37         ` H. Peter Anvin
  1 sibling, 0 replies; 8+ messages in thread
From: H. Peter Anvin @ 2014-03-17 18:37 UTC (permalink / raw)
  To: chandramouli narayanan, Marek Vasut
  Cc: herbert, davem, ilya.albrekht, maxim.locktyukhin, ronen.zohar,
	wajdi.k.feghali, tim.c.chen, linux-crypto

On 03/17/2014 09:53 AM, chandramouli narayanan wrote:
> On second thoughts, with sha1-sse3-(CONFIG_AS_AVX2) +=
> sha1_avx2_x86_64_asm.o, I have build issues and sha1_transform_avx2
> undefined in sha1-sss3.ko. 
> 
> I can rid #ifdef CONFIG_AS_AVX2 in patch1. The following works though:
> ifeq ($(avx2_supported),yes)
> sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> endif

Yes, the sad thing is that the CONFIG_AS_* things aren't real config
symbols, despite the name.  They might be in the future when Kconfig can
run test probes (something we have needed for a very long time.)

The "yes" versus "y", though, is a total faceplant.

	-hpa

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2
  2014-03-17 18:11         ` Marek Vasut
@ 2014-03-17 19:09           ` chandramouli narayanan
  0 siblings, 0 replies; 8+ messages in thread
From: chandramouli narayanan @ 2014-03-17 19:09 UTC (permalink / raw)
  To: Marek Vasut
  Cc: herbert, davem, hpa, ilya.albrekht, maxim.locktyukhin,
	ronen.zohar, wajdi.k.feghali, tim.c.chen, linux-crypto

On Mon, 2014-03-17 at 19:11 +0100, Marek Vasut wrote:
> On Monday, March 17, 2014 at 05:53:52 PM, chandramouli narayanan wrote:
> > On Mon, 2014-03-17 at 17:06 +0100, Marek Vasut wrote:
> > > On Monday, March 17, 2014 at 04:53:12 PM, chandramouli narayanan wrote:
> > > > On Fri, 2014-03-14 at 06:40 +0100, Marek Vasut wrote:
> > > > > On Wednesday, March 12, 2014 at 07:47:50 PM, chandramouli narayanan 
> wrote:
> > > > > > This git patch adds the glue, build and configuration changes
> > > > > > to include x86_64 AVX2 optimization of SHA1 transform to
> > > > > > crypto support. The patch has been tested with 3.14.0-rc1
> > > > > > kernel.
> > > > > > 
> > > > > > Changes from the initial version of this patch are in
> > > > > > a) check for BMI2 in addition to AVX2 support since
> > > > > > __sha1_transform_avx2() uses rorx
> > > > > > b) Since the module build has dependency on 64bit, it is
> > > > > > redundant to check it in the code here.
> > > > > > 
> > > > > > On a Haswell desktop, with turbo disabled and all cpus running
> > > > > > at maximum frequency, tcrypt shows AVX2 performance improvement
> > > > > > from 3% for 256 bytes update to 16% for 1024 bytes update over
> > > > > > AVX implementation.
> > > > > > 
> > > > > > Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
> > > > > > 
> > > > > > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > > > > > index 6ba54d6..61d6e28 100644
> > > > > > --- a/arch/x86/crypto/Makefile
> > > > > > +++ b/arch/x86/crypto/Makefile
> > > > > > @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o
> > > > > > aesni-intel_glue.o fpu.o aesni-intel-$(CONFIG_64BIT) +=
> > > > > > aesni-intel_avx-x86_64.o
> > > > > > 
> > > > > >  ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o
> > > > > > 
> > > > > > ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o
> > > > > > sha1_ssse3_glue.o
> > > > > > +ifeq ($(avx2_supported),yes)
> > > > > > +sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> > > > > 
> > > > > Use:
> > > > > 
> > > > > sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o
> > > > > 
> > > > > And you will not need the CONFIG_AS_AVX2 ifdef in your previous
> > > > > patch, no ? [...]
> > > > > Best regards,
> > > > > Marek Vasut
> > > > 
> > > > Sorry for the delayed reply. Agreed, I will fix the dependency.
> > > 
> > > No problem, thanks!
> > > 
> > > Best regards,
> > > Marek Vasut
> > 
> > On second thoughts, with sha1-sse3-(CONFIG_AS_AVX2) +=
> > sha1_avx2_x86_64_asm.o, I have build issues and sha1_transform_avx2
> > undefined in sha1-sss3.ko.
> > 
> > I can rid #ifdef CONFIG_AS_AVX2 in patch1. The following works though:
> > ifeq ($(avx2_supported),yes)
> > sha1-ssse3-y += sha1_avx2_x86_64_asm.o
> > endif
> 
> Looking throughout the arch/x86/crypto/Makefile , this sha1-ssse3.o thing is a 
> bit odd I think. Why exactly does this not follow suit with the camellia or 
> serpent ciphers ? I mean, look at their build rules, they handle all of 
> SSE2/AVX/AVX2 implementation and it's build. Can we not clean up the SHA1-SSSE3 
> to do exactly the same ? But please note I might be just plain wrong and if 
> that's the case, let me know ;-)
I appended AVX2 support to the existing sha1_ssse3_glue. I will see if
it can be cleaned up. 
> 
> btw. I noticed another nit in the code. You use __sha1_transform_avx2() , but 
> there previous function using AVX1 is called sha1_transform_avx() . Drop those 
> two underscores please for consistency's sake.
> 
__sha1_transform_avx2() is merely an internal inline function. The code
patch picks sha1_transform_avx() or sha1_transform_avx2() depending on
the datablock size (based on the results from running tcrypt). 
 
> Thanks!
> 
> Best regards,
> Marek Vasut

thanks
- mouli

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-03-17 19:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-12 18:47 [PATCH 2/2] SHA1 transform: x86_64 AVX2 optimization - glue & build-v2 chandramouli narayanan
2014-03-14  5:40 ` Marek Vasut
2014-03-17 15:53   ` chandramouli narayanan
2014-03-17 16:06     ` Marek Vasut
2014-03-17 16:53       ` chandramouli narayanan
2014-03-17 18:11         ` Marek Vasut
2014-03-17 19:09           ` chandramouli narayanan
2014-03-17 18:37         ` H. Peter Anvin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.