All of lore.kernel.org
 help / color / mirror / Atom feed
* [v2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
@ 2017-04-06  5:38 Matt Brown
  2017-04-06  6:13 ` Daniel Axtens
  0 siblings, 1 reply; 3+ messages in thread
From: Matt Brown @ 2017-04-06  5:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: dja, linux-raid

The raid6 Q syndrome check has been optimised using the vpermxor
instruction.  This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:

	raid6: altivecx4 gen() 18773 MB/s
	raid6: altivecx8 gen() 19438 MB/s

	raid6: vpermxor4 gen() 25112 MB/s
	raid6: vpermxor8 gen() 26279 MB/s

Bugs fixed:
	- A small bug in pq.h regarding a missing and mismatched
	  ifdef statement
	- Fixed test/Makefile to correctly build test on ppc

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
mpe I assume you are ok to take this patch, most of the other ppc raid patches have gone through you.

Changelog:
v2
	- added reference to raid6 paper
	- shortened asm lines
	- removed redundant Makefile line
	- fixed test/Makefile bug
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile      |  27 ++++++++++++-
 lib/raid6/algos.c       |   4 ++
 lib/raid6/altivec.uc    |   3 ++
 lib/raid6/test/Makefile |  26 +++++++++---
 lib/raid6/vpermxor.uc   | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 161 insertions(+), 7 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+				vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec2,
 	&raid6_altivec4,
 	&raid6_altivec8,
+	&raid6_vpermxor1,
+	&raid6_vpermxor2,
+	&raid6_vpermxor4,
+	&raid6_vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
 	&raid6_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include <linux/raid/pq.h>
 
+#ifdef CONFIG_ALTIVEC
+
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..b9503a3 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -43,11 +43,13 @@ else ifeq ($(HAS_NEON),yes)
         OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
-        HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-                         gcc -c -x c - >&/dev/null && \
-                         rm ./-.o && echo yes)
-        ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+	 HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
+			 gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
+	 ifeq ($(HAS_ALTIVEC),yes)
+		CFLAGS += -I../../../arch/powerpc/include
+		CFLAGS += -DCONFIG_ALTIVEC
+		OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+			vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
 ifeq ($(ARCH),tilegx)
@@ -97,6 +99,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -122,7 +136,7 @@ tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 	rm -f tilegx*.c
 
 spotless: clean
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000..cb7bed3
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor opcode to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and the vpermxor opcode.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+					    0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+					    0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+					     0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+					     0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+							void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+	unative_t wp$$, wq$$, wd$$;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for (d = 0; d < bytes; d += NSIZE*$#) {
+		wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+		for (z = z0-1; z>=0; z--) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			/* P syndrome */
+			wp$$ = vec_xor(wp$$, wd$$);
+
+			/*Q syndrome */
+			asm("vpermxor %0,%1,%2,%3":"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+			wq$$ = vec_xor(wq$$, wd$$);
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+
+	raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+	disable_kernel_altivec();
+	preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+	/* Check if CPU has both altivec and the vpermxor instruction*/
+# ifdef __KERNEL__
+	return (cpu_has_feature(CONFIG_ALTIVEC) &&
+		cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+	return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+	raid6_vpermxor$#_gen_syndrome,
+	NULL,
+	raid6_have_altivec_vpermxor,
+	"vpermxor$#",
+	0
+};
+#endif
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [v2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
  2017-04-06  5:38 [v2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome Matt Brown
@ 2017-04-06  6:13 ` Daniel Axtens
  2017-04-10  6:54   ` Michael Ellerman
  0 siblings, 1 reply; 3+ messages in thread
From: Daniel Axtens @ 2017-04-06  6:13 UTC (permalink / raw)
  To: Matt Brown, linuxppc-dev; +Cc: linux-raid

Hi Matt,

Thanks for answering my questions and doing those fixes.


> Bugs fixed:
> 	- A small bug in pq.h regarding a missing and mismatched
> 	  ifdef statement
> 	- Fixed test/Makefile to correctly build test on ppc
>

I think this commit should be labelled:
Fixes: 4f8c55c5ad49 ("lib/raid6: build proper files on corresponding arch")

mpe can probably add that when he merges - no need to do a new version :)

>  else
> -        HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
> -                         gcc -c -x c - >&/dev/null && \
> -                         rm ./-.o && echo yes)
> -        ifeq ($(HAS_ALTIVEC),yes)
> -                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
> +	 HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
> +			 gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
> +	 ifeq ($(HAS_ALTIVEC),yes)
> +		CFLAGS += -I../../../arch/powerpc/include
> +		CFLAGS += -DCONFIG_ALTIVEC
> +		OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
> +			vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
>          endif
>  endif
Looks like vim has replaced spaces with tabs here. Not sure how much we
care...

>  ifeq ($(ARCH),tilegx)
> @@ -97,6 +99,18 @@ altivec4.c: altivec.uc ../unroll.awk
>  altivec8.c: altivec.uc ../unroll.awk
>  	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
>
... especially seeing as tabs are already used in the file here!

> +vpermxor1.c: vpermxor.uc ../unroll.awk
> +	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
> +
> +vpermxor2.c: vpermxor.uc ../unroll.awk
> +	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
> +
> +vpermxor4.c: vpermxor.uc ../unroll.awk
> +	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
> +
> +vpermxor8.c: vpermxor.uc ../unroll.awk
> +	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
> +
>  int1.c: int.uc ../unroll.awk
>  	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
>  
> +			/*Q syndrome */
> +	/* Check if CPU has both altivec and the vpermxor instruction*/
Very trivial nit: for future patches please make sure you have spaces
between the beginning/end of a comment and the comment.

> +# ifdef __KERNEL__
> +	return (cpu_has_feature(CONFIG_ALTIVEC) &&
> +		cpu_has_feature(CPU_FTR_ARCH_207S));
I think CPU_FTR_ARCH_207S implies Altivec? Again, not a real problem,
I don't think it's really worth doing a respin for any of these, so:

Reviewed-by: Daniel Axtens <dja@axtens.net>

Thanks for making Power faster!

Regards,
Daniel


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [v2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
  2017-04-06  6:13 ` Daniel Axtens
@ 2017-04-10  6:54   ` Michael Ellerman
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Ellerman @ 2017-04-10  6:54 UTC (permalink / raw)
  To: Daniel Axtens, Matt Brown, linuxppc-dev; +Cc: linux-raid

Daniel Axtens <dja@axtens.net> writes:

> Hi Matt,
>
> Thanks for answering my questions and doing those fixes.
>
>
>> Bugs fixed:
>> 	- A small bug in pq.h regarding a missing and mismatched
>> 	  ifdef statement
>> 	- Fixed test/Makefile to correctly build test on ppc
>>
>
> I think this commit should be labelled:
> Fixes: 4f8c55c5ad49 ("lib/raid6: build proper files on corresponding arch")
>
> mpe can probably add that when he merges - no need to do a new version :)

Please send a separate patch which does that fix.

>>  else
>> -        HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
>> -                         gcc -c -x c - >&/dev/null && \
>> -                         rm ./-.o && echo yes)
>> -        ifeq ($(HAS_ALTIVEC),yes)
>> -                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
>> +	 HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
>> +			 gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
>> +	 ifeq ($(HAS_ALTIVEC),yes)
>> +		CFLAGS += -I../../../arch/powerpc/include
>> +		CFLAGS += -DCONFIG_ALTIVEC
>> +		OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
>> +			vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
>>          endif
>>  endif
> Looks like vim has replaced spaces with tabs here. Not sure how much we
> care...

We care at least because it makes the diff look bigger than it really
is, if I'm reading it right the first three lines haven't actually
changed.

>> @@ -97,6 +99,18 @@ altivec4.c: altivec.uc ../unroll.awk
>>  altivec8.c: altivec.uc ../unroll.awk
>>  	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
>>
> ... especially seeing as tabs are already used in the file here!

It's a Makefile! Tabs have meaning :)

>> +# ifdef __KERNEL__
>> +	return (cpu_has_feature(CONFIG_ALTIVEC) &&
>> +		cpu_has_feature(CPU_FTR_ARCH_207S));
> I think CPU_FTR_ARCH_207S implies Altivec? Again, not a real problem,

It doesn't.

And also CONFIG_ALTIVEC is not a cpu feature!

You should be using CPU_FTR_ALTIVEC_COMP. That copes with the case where
the kernel is compiled without ALTIVEC support.

cheers

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-04-10  6:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-06  5:38 [v2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome Matt Brown
2017-04-06  6:13 ` Daniel Axtens
2017-04-10  6:54   ` Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.