All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greentime Hu <greentime.hu@sifive.com>
To: greentime.hu@sifive.com, linux-riscv@lists.infradead.org,
	linux-kernel@vger.kernel.org, aou@eecs.berkeley.edu,
	palmer@dabbelt.com, paul.walmsley@sifive.com
Cc: Han-Kuan Chen <hankuan.chen@sifive.com>
Subject: [RFC PATCH v7 16/21] riscv: Add vector extension XOR implementation
Date: Thu, 10 Sep 2020 16:12:11 +0800	[thread overview]
Message-ID: <48fa9c9a6f13d00c67042657326d56a46ec0197f.1599719352.git.greentime.hu@sifive.com> (raw)
In-Reply-To: <cover.1599719352.git.greentime.hu@sifive.com>

This patch adds support for vector optimized XOR it is tested in spike and
qemu.

Logs in spike:
[    0.008365] xor: measuring software checksum speed
[    0.048885]    8regs     :  1719.000 MB/sec
[    0.089080]    32regs    :  1717.000 MB/sec
[    0.129275]    rvv       :  7043.000 MB/sec
[    0.129525] xor: using function: rvv (7043.000 MB/sec)

Logs in qemu:
[    0.098943] xor: measuring software checksum speed
[    0.139391]    8regs     :  2911.000 MB/sec
[    0.181079]    32regs    :  2813.000 MB/sec
[    0.224260]    rvv       :    45.000 MB/sec
[    0.225586] xor: using function: 8regs (2911.000 MB/sec)

Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
---
 arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
 arch/riscv/lib/Makefile      |  1 +
 arch/riscv/lib/xor.S         | 81 ++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+)
 create mode 100644 arch/riscv/include/asm/xor.h
 create mode 100644 arch/riscv/lib/xor.S

diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..60ee0224913d
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_VECTOR
+#include <asm/vector.h>
+
+extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2);
+extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3);
+extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3,
+			unsigned long *p4);
+extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3, unsigned long *p4,
+			unsigned long *p5);
+
+static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	kernel_rvv_begin();
+	xor_regs_2_(bytes, p1, p2);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3)
+{
+	kernel_rvv_begin();
+	xor_regs_3_(bytes, p1, p2, p3);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4)
+{
+	kernel_rvv_begin();
+	xor_regs_4_(bytes, p1, p2, p3, p4);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	kernel_rvv_begin();
+	xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+	kernel_rvv_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+	.name = "rvv",
+	.do_2 = xor_rvv_2,
+	.do_3 = xor_rvv_3,
+	.do_4 = xor_rvv_4,
+	.do_5 = xor_rvv_5
+};
+
+extern bool has_vector;
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+	do {        \
+		xor_speed(&xor_block_8regs);    \
+		xor_speed(&xor_block_32regs);    \
+		if (has_vector) { \
+			xor_speed(&xor_block_rvv);\
+		} \
+	} while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 0d0db80800c4..cedf8d573dc3 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -4,3 +4,4 @@ lib-y			+= memcpy.o
 lib-y			+= memset.o
 lib-y			+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
+lib-$(CONFIG_VECTOR)	+= xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..de2e234c39ed
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+	vsetvli a3, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a3
+	vxor.vv v16, v0, v8
+	add a2, a2, a3
+	vse8.v v16, (a1)
+	add a1, a1, a3
+	bnez a0, xor_regs_2_
+	ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+	vsetvli a4, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a4
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a4
+	vxor.vv v16, v0, v16
+	add a3, a3, a4
+	vse8.v v16, (a1)
+	add a1, a1, a4
+	bnez a0, xor_regs_3_
+	ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+	vsetvli a5, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a5
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a5
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a5
+	vxor.vv v16, v0, v24
+	add a4, a4, a5
+	vse8.v v16, (a1)
+	add a1, a1, a5
+	bnez a0, xor_regs_4_
+	ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+	vsetvli a6, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a6
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a6
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a6
+	vxor.vv v0, v0, v24
+	vle8.v v8, (a5)
+	add a4, a4, a6
+	vxor.vv v16, v0, v8
+	add a5, a5, a6
+	vse8.v v16, (a1)
+	add a1, a1, a6
+	bnez a0, xor_regs_5_
+	ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
-- 
2.28.0


WARNING: multiple messages have this Message-ID (diff)
From: Greentime Hu <greentime.hu@sifive.com>
To: greentime.hu@sifive.com, linux-riscv@lists.infradead.org,
	linux-kernel@vger.kernel.org, aou@eecs.berkeley.edu,
	palmer@dabbelt.com, paul.walmsley@sifive.com
Cc: Han-Kuan Chen <hankuan.chen@sifive.com>
Subject: [RFC PATCH v7 16/21] riscv: Add vector extension XOR implementation
Date: Thu, 10 Sep 2020 16:12:11 +0800	[thread overview]
Message-ID: <48fa9c9a6f13d00c67042657326d56a46ec0197f.1599719352.git.greentime.hu@sifive.com> (raw)
In-Reply-To: <cover.1599719352.git.greentime.hu@sifive.com>

This patch adds support for vector optimized XOR it is tested in spike and
qemu.

Logs in spike:
[    0.008365] xor: measuring software checksum speed
[    0.048885]    8regs     :  1719.000 MB/sec
[    0.089080]    32regs    :  1717.000 MB/sec
[    0.129275]    rvv       :  7043.000 MB/sec
[    0.129525] xor: using function: rvv (7043.000 MB/sec)

Logs in qemu:
[    0.098943] xor: measuring software checksum speed
[    0.139391]    8regs     :  2911.000 MB/sec
[    0.181079]    32regs    :  2813.000 MB/sec
[    0.224260]    rvv       :    45.000 MB/sec
[    0.225586] xor: using function: 8regs (2911.000 MB/sec)

Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
---
 arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
 arch/riscv/lib/Makefile      |  1 +
 arch/riscv/lib/xor.S         | 81 ++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+)
 create mode 100644 arch/riscv/include/asm/xor.h
 create mode 100644 arch/riscv/lib/xor.S

diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..60ee0224913d
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_VECTOR
+#include <asm/vector.h>
+
+extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2);
+extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3);
+extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3,
+			unsigned long *p4);
+extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3, unsigned long *p4,
+			unsigned long *p5);
+
+static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	kernel_rvv_begin();
+	xor_regs_2_(bytes, p1, p2);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3)
+{
+	kernel_rvv_begin();
+	xor_regs_3_(bytes, p1, p2, p3);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4)
+{
+	kernel_rvv_begin();
+	xor_regs_4_(bytes, p1, p2, p3, p4);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	kernel_rvv_begin();
+	xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+	kernel_rvv_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+	.name = "rvv",
+	.do_2 = xor_rvv_2,
+	.do_3 = xor_rvv_3,
+	.do_4 = xor_rvv_4,
+	.do_5 = xor_rvv_5
+};
+
+extern bool has_vector;
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+	do {        \
+		xor_speed(&xor_block_8regs);    \
+		xor_speed(&xor_block_32regs);    \
+		if (has_vector) { \
+			xor_speed(&xor_block_rvv);\
+		} \
+	} while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 0d0db80800c4..cedf8d573dc3 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -4,3 +4,4 @@ lib-y			+= memcpy.o
 lib-y			+= memset.o
 lib-y			+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
+lib-$(CONFIG_VECTOR)	+= xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..de2e234c39ed
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+	vsetvli a3, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a3
+	vxor.vv v16, v0, v8
+	add a2, a2, a3
+	vse8.v v16, (a1)
+	add a1, a1, a3
+	bnez a0, xor_regs_2_
+	ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+	vsetvli a4, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a4
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a4
+	vxor.vv v16, v0, v16
+	add a3, a3, a4
+	vse8.v v16, (a1)
+	add a1, a1, a4
+	bnez a0, xor_regs_3_
+	ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+	vsetvli a5, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a5
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a5
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a5
+	vxor.vv v16, v0, v24
+	add a4, a4, a5
+	vse8.v v16, (a1)
+	add a1, a1, a5
+	bnez a0, xor_regs_4_
+	ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+	vsetvli a6, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a6
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a6
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a6
+	vxor.vv v0, v0, v24
+	vle8.v v8, (a5)
+	add a4, a4, a6
+	vxor.vv v16, v0, v8
+	add a5, a5, a6
+	vse8.v v16, (a1)
+	add a1, a1, a6
+	bnez a0, xor_regs_5_
+	ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
-- 
2.28.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2020-09-10  8:23 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-10  8:11 [RFC PATCH v7 00/21] riscv: Add vector ISA support Greentime Hu
2020-09-10  8:11 ` Greentime Hu
2020-09-10  8:11 ` [RFC PATCH v7 01/21] riscv: Separate patch for cflags and aflags Greentime Hu
2020-09-10  8:11   ` Greentime Hu
2020-09-10  8:11 ` [RFC PATCH v7 02/21] riscv: Rename __switch_to_aux -> fpu Greentime Hu
2020-09-10  8:11   ` Greentime Hu
2020-09-10  8:11 ` [RFC PATCH v7 03/21] riscv: Extending cpufeature.c to detect V-extension Greentime Hu
2020-09-10  8:11   ` Greentime Hu
2020-09-10  8:11 ` [RFC PATCH v7 04/21] riscv: Add new csr defines related to vector extension Greentime Hu
2020-09-10  8:11   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 05/21] riscv: Add vector feature to compile Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 06/21] riscv: Add has_vector/riscv_vsize to save vector features Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 07/21] riscv: Reset vector register Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 08/21] riscv: Add vector struct and assembler definitions Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 09/21] riscv: Add task switch support for vector Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10 15:48   ` kernel test robot
2020-09-10  8:12 ` [RFC PATCH v7 10/21] " Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 11/21] riscv: Add ptrace vector support Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 12/21] riscv: Add sigcontext save/restore for vector Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 13/21] riscv: signal: Report signal frame size to userspace via auxv Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10 18:35   ` kernel test robot
2020-09-10  8:12 ` [RFC PATCH v7 14/21] riscv: Add support for kernel mode vector Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10 20:53   ` kernel test robot
2020-09-10  8:12 ` [RFC PATCH v7 15/21] riscv: Use CSR_STATUS to replace sstatus in vector.S Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` Greentime Hu [this message]
2020-09-10  8:12   ` [RFC PATCH v7 16/21] riscv: Add vector extension XOR implementation Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 17/21] riscv: Initialize vector registers with proper vsetvli then it can work normally Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 18/21] riscv: Optimize vector registers initialization Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10 23:33   ` kernel test robot
2020-09-10  8:12 ` [RFC PATCH v7 19/21] riscv: Fix an illegal instruction exception when accessing vlenb without enable vector first Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 20/21] riscv: Allocate space for vector registers in start_thread() Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-10  8:12 ` [RFC PATCH v7 21/21] riscv: Optimize task switch codes of vector Greentime Hu
2020-09-10  8:12   ` Greentime Hu
2020-09-11  2:13   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48fa9c9a6f13d00c67042657326d56a46ec0197f.1599719352.git.greentime.hu@sifive.com \
    --to=greentime.hu@sifive.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=hankuan.chen@sifive.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.