From: Greentime Hu <greentime.hu@sifive.com> To: greentime.hu@sifive.com, linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org, aou@eecs.berkeley.edu, palmer@dabbelt.com, paul.walmsley@sifive.com Cc: Han-Kuan Chen <hankuan.chen@sifive.com> Subject: [RFC PATCH v7 16/21] riscv: Add vector extension XOR implementation Date: Thu, 10 Sep 2020 16:12:11 +0800 [thread overview] Message-ID: <48fa9c9a6f13d00c67042657326d56a46ec0197f.1599719352.git.greentime.hu@sifive.com> (raw) In-Reply-To: <cover.1599719352.git.greentime.hu@sifive.com> This patch adds support for vector optimized XOR it is tested in spike and qemu. Logs in spike: [ 0.008365] xor: measuring software checksum speed [ 0.048885] 8regs : 1719.000 MB/sec [ 0.089080] 32regs : 1717.000 MB/sec [ 0.129275] rvv : 7043.000 MB/sec [ 0.129525] xor: using function: rvv (7043.000 MB/sec) Logs in qemu: [ 0.098943] xor: measuring software checksum speed [ 0.139391] 8regs : 2911.000 MB/sec [ 0.181079] 32regs : 2813.000 MB/sec [ 0.224260] rvv : 45.000 MB/sec [ 0.225586] xor: using function: 8regs (2911.000 MB/sec) Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com> Signed-off-by: Greentime Hu <greentime.hu@sifive.com> --- arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++ arch/riscv/lib/Makefile | 1 + arch/riscv/lib/xor.S | 81 ++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 arch/riscv/include/asm/xor.h create mode 100644 arch/riscv/lib/xor.S diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h new file mode 100644 index 000000000000..60ee0224913d --- /dev/null +++ b/arch/riscv/include/asm/xor.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 SiFive + */ + +#include <linux/hardirq.h> +#include <asm-generic/xor.h> +#ifdef CONFIG_VECTOR +#include <asm/vector.h> + +extern void xor_regs_2_(unsigned long bytes, unsigned long *p1, + unsigned long *p2); +extern void xor_regs_3_(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3); +extern void xor_regs_4_(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4); +extern void xor_regs_5_(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, unsigned long *p4, + unsigned long *p5); + +static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + kernel_rvv_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_rvv_end(); +} + +static void +xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + kernel_rvv_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_rvv_end(); +} + +static void +xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + kernel_rvv_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_rvv_end(); +} + +static void +xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + kernel_rvv_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_rvv_end(); +} + +static struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_rvv_2, + .do_3 = xor_rvv_3, + .do_4 = xor_rvv_4, + .do_5 = xor_rvv_5 +}; + +extern bool has_vector; +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (has_vector) { \ + xor_speed(&xor_block_rvv);\ + } \ + } while (0) +#endif diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 0d0db80800c4..cedf8d573dc3 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -4,3 +4,4 @@ lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o lib-$(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_VECTOR) += xor.o diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 000000000000..de2e234c39ed --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 SiFive + */ +#include <linux/linkage.h> +#include <asm-generic/export.h> +#include <asm/asm.h> + +ENTRY(xor_regs_2_) + vsetvli a3, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +ENTRY(xor_regs_3_) + vsetvli a4, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +ENTRY(xor_regs_4_) + vsetvli a5, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +ENTRY(xor_regs_5_) + vsetvli a6, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_) -- 2.28.0
WARNING: multiple messages have this Message-ID (diff)
From: Greentime Hu <greentime.hu@sifive.com> To: greentime.hu@sifive.com, linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org, aou@eecs.berkeley.edu, palmer@dabbelt.com, paul.walmsley@sifive.com Cc: Han-Kuan Chen <hankuan.chen@sifive.com> Subject: [RFC PATCH v7 16/21] riscv: Add vector extension XOR implementation Date: Thu, 10 Sep 2020 16:12:11 +0800 [thread overview] Message-ID: <48fa9c9a6f13d00c67042657326d56a46ec0197f.1599719352.git.greentime.hu@sifive.com> (raw) In-Reply-To: <cover.1599719352.git.greentime.hu@sifive.com> This patch adds support for vector optimized XOR it is tested in spike and qemu. Logs in spike: [ 0.008365] xor: measuring software checksum speed [ 0.048885] 8regs : 1719.000 MB/sec [ 0.089080] 32regs : 1717.000 MB/sec [ 0.129275] rvv : 7043.000 MB/sec [ 0.129525] xor: using function: rvv (7043.000 MB/sec) Logs in qemu: [ 0.098943] xor: measuring software checksum speed [ 0.139391] 8regs : 2911.000 MB/sec [ 0.181079] 32regs : 2813.000 MB/sec [ 0.224260] rvv : 45.000 MB/sec [ 0.225586] xor: using function: 8regs (2911.000 MB/sec) Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com> Signed-off-by: Greentime Hu <greentime.hu@sifive.com> --- arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++ arch/riscv/lib/Makefile | 1 + arch/riscv/lib/xor.S | 81 ++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 arch/riscv/include/asm/xor.h create mode 100644 arch/riscv/lib/xor.S diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h new file mode 100644 index 000000000000..60ee0224913d --- /dev/null +++ b/arch/riscv/include/asm/xor.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 SiFive + */ + +#include <linux/hardirq.h> +#include <asm-generic/xor.h> +#ifdef CONFIG_VECTOR +#include <asm/vector.h> + +extern void xor_regs_2_(unsigned long bytes, unsigned long *p1, + unsigned long *p2); +extern void xor_regs_3_(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3); +extern void xor_regs_4_(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4); +extern void xor_regs_5_(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, unsigned long *p4, + unsigned long *p5); + +static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + kernel_rvv_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_rvv_end(); +} + +static void +xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + kernel_rvv_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_rvv_end(); +} + +static void +xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + kernel_rvv_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_rvv_end(); +} + +static void +xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + kernel_rvv_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_rvv_end(); +} + +static struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_rvv_2, + .do_3 = xor_rvv_3, + .do_4 = xor_rvv_4, + .do_5 = xor_rvv_5 +}; + +extern bool has_vector; +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (has_vector) { \ + xor_speed(&xor_block_rvv);\ + } \ + } while (0) +#endif diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 0d0db80800c4..cedf8d573dc3 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -4,3 +4,4 @@ lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o lib-$(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_VECTOR) += xor.o diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 000000000000..de2e234c39ed --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 SiFive + */ +#include <linux/linkage.h> +#include <asm-generic/export.h> +#include <asm/asm.h> + +ENTRY(xor_regs_2_) + vsetvli a3, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +ENTRY(xor_regs_3_) + vsetvli a4, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +ENTRY(xor_regs_4_) + vsetvli a5, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +ENTRY(xor_regs_5_) + vsetvli a6, a0, e8, m8 + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_) -- 2.28.0 _______________________________________________ linux-riscv mailing list linux-riscv@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-riscv
next prev parent reply other threads:[~2020-09-10 8:23 UTC|newest] Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-09-10 8:11 [RFC PATCH v7 00/21] riscv: Add vector ISA support Greentime Hu 2020-09-10 8:11 ` Greentime Hu 2020-09-10 8:11 ` [RFC PATCH v7 01/21] riscv: Separate patch for cflags and aflags Greentime Hu 2020-09-10 8:11 ` Greentime Hu 2020-09-10 8:11 ` [RFC PATCH v7 02/21] riscv: Rename __switch_to_aux -> fpu Greentime Hu 2020-09-10 8:11 ` Greentime Hu 2020-09-10 8:11 ` [RFC PATCH v7 03/21] riscv: Extending cpufeature.c to detect V-extension Greentime Hu 2020-09-10 8:11 ` Greentime Hu 2020-09-10 8:11 ` [RFC PATCH v7 04/21] riscv: Add new csr defines related to vector extension Greentime Hu 2020-09-10 8:11 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 05/21] riscv: Add vector feature to compile Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 06/21] riscv: Add has_vector/riscv_vsize to save vector features Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 07/21] riscv: Reset vector register Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 08/21] riscv: Add vector struct and assembler definitions Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 09/21] riscv: Add task switch support for vector Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 15:48 ` kernel test robot 2020-09-10 8:12 ` [RFC PATCH v7 10/21] " Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 11/21] riscv: Add ptrace vector support Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 12/21] riscv: Add sigcontext save/restore for vector Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 13/21] riscv: signal: Report signal frame size to userspace via auxv Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 18:35 ` kernel test robot 2020-09-10 8:12 ` [RFC PATCH v7 14/21] riscv: Add support for kernel mode vector Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 20:53 ` kernel test robot 2020-09-10 8:12 ` [RFC PATCH v7 15/21] riscv: Use CSR_STATUS to replace sstatus in vector.S Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` Greentime Hu [this message] 2020-09-10 8:12 ` [RFC PATCH v7 16/21] riscv: Add vector extension XOR implementation Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 17/21] riscv: Initialize vector registers with proper vsetvli then it can work normally Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 18/21] riscv: Optimize vector registers initialization Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 23:33 ` kernel test robot 2020-09-10 8:12 ` [RFC PATCH v7 19/21] riscv: Fix an illegal instruction exception when accessing vlenb without enable vector first Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 20/21] riscv: Allocate space for vector registers in start_thread() Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-10 8:12 ` [RFC PATCH v7 21/21] riscv: Optimize task switch codes of vector Greentime Hu 2020-09-10 8:12 ` Greentime Hu 2020-09-11 2:13 ` kernel test robot
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=48fa9c9a6f13d00c67042657326d56a46ec0197f.1599719352.git.greentime.hu@sifive.com \ --to=greentime.hu@sifive.com \ --cc=aou@eecs.berkeley.edu \ --cc=hankuan.chen@sifive.com \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-riscv@lists.infradead.org \ --cc=palmer@dabbelt.com \ --cc=paul.walmsley@sifive.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.