From: Andrew Jones <ajones@ventanamicro.com> To: linux-riscv@lists.infradead.org, kvm-riscv@lists.infradead.org, devicetree@vger.kernel.org Cc: 'Heiko Stuebner ' <heiko@sntech.de>, 'Krzysztof Kozlowski ' <krzysztof.kozlowski+dt@linaro.org>, 'Anup Patel ' <apatel@ventanamicro.com>, 'Palmer Dabbelt ' <palmer@dabbelt.com>, 'Atish Patra ' <atishp@rivosinc.com>, 'Paul Walmsley ' <paul.walmsley@sifive.com>, 'Albert Ou ' <aou@eecs.berkeley.edu>, 'Conor Dooley ' <conor.dooley@microchip.com>, 'Rob Herring ' <robh@kernel.org>, 'Jisheng Zhang ' <jszhang@kernel.org> Subject: [PATCH v3 4/6] RISC-V: Use Zicboz in clear_page when available Date: Mon, 30 Jan 2023 13:01:26 +0100 [thread overview] Message-ID: <20230130120128.1349464-5-ajones@ventanamicro.com> (raw) In-Reply-To: <20230130120128.1349464-1-ajones@ventanamicro.com> Using memset() to zero a 4K page takes 563 total instructions where 20 are branches. clear_page() with Zicboz takes 150 total instructions where 16 are branches. We could reduce the numbers by further unrolling, but, since the cboz block size isn't fixed, we'd need a Duff device to ensure we don't execute too many unrolled steps. Also, cbo.zero doesn't take an offset, so each unrolled step requires it and an add instruction. This increases the chance for icache misses if we unroll many times. For these reasons we only unroll four times. Unrolling four times should be safe as it supports cboz block sizes up to 1K when used with 4K pages and it's only 24 to 32 bytes of unrolled instructions. Another note about the Duff device idea is that it would probably be best to store the number of steps needed at boot time and then load the value in clear_page(). Calculating it in clear_page(), particularly without the Zbb extension, would not be efficient. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Acked-by: Conor Dooley <conor.dooley@microchip.com> --- arch/riscv/Kconfig | 13 +++++++++++ arch/riscv/include/asm/insn-def.h | 4 ++++ arch/riscv/include/asm/page.h | 6 +++++- arch/riscv/lib/Makefile | 1 + arch/riscv/lib/clear_page.S | 36 +++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/lib/clear_page.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 33bbdc33cef8..3759a2f6edd5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -432,6 +432,19 @@ config RISCV_ISA_ZICBOM If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOZ + bool "Zicboz extension support for faster zeroing of memory" + depends on !XIP_KERNEL && MMU + select RISCV_ALTERNATIVE + default y + help + Enable the use of the ZICBOZ extension (cbo.zero instruction) + when available. + + The Zicboz extension is used for faster zeroing of memory. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZIHINTPAUSE bool default y diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index e01ab51f50d2..6960beb75f32 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -192,4 +192,8 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(2)) +#define CBO_zero(base) \ + INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ + RS1(base), SIMM12(4)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 9f432c1b5289..ccd168fe29d2 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -49,10 +49,14 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_RISCV_ISA_ZICBOZ +void clear_page(void *page); +#else #define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) +#endif #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) -#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) +#define clear_user_page(pgaddr, vaddr, page) clear_page(pgaddr) #define copy_user_page(vto, vfrom, vaddr, topg) \ memcpy((vto), (vfrom), PAGE_SIZE) diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 25d5c9664e57..9ee5e2ab5143 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -5,5 +5,6 @@ lib-y += memset.o lib-y += memmove.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S new file mode 100644 index 000000000000..49f29139a5b6 --- /dev/null +++ b/arch/riscv/lib/clear_page.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> +#include <asm/insn-def.h> +#include <asm/page.h> + +/* void clear_page(void *page) */ +ENTRY(__clear_page) +WEAK(clear_page) + li a2, PAGE_SIZE + ALTERNATIVE("j .Lno_zicboz", "nop", + 0, RISCV_ISA_EXT_ZICBOZ, CONFIG_RISCV_ISA_ZICBOZ) + la a1, riscv_cboz_block_size + lw a1, 0(a1) + add a2, a0, a2 +.Lzero_loop: + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + bltu a0, a2, .Lzero_loop + ret +.Lno_zicboz: + li a1, 0 + tail __memset +END(__clear_page) -- 2.39.1
WARNING: multiple messages have this Message-ID (diff)
From: Andrew Jones <ajones@ventanamicro.com> To: linux-riscv@lists.infradead.org, kvm-riscv@lists.infradead.org, devicetree@vger.kernel.org Cc: 'Heiko Stuebner ' <heiko@sntech.de>, 'Krzysztof Kozlowski ' <krzysztof.kozlowski+dt@linaro.org>, 'Anup Patel ' <apatel@ventanamicro.com>, 'Palmer Dabbelt ' <palmer@dabbelt.com>, 'Atish Patra ' <atishp@rivosinc.com>, 'Paul Walmsley ' <paul.walmsley@sifive.com>, 'Albert Ou ' <aou@eecs.berkeley.edu>, 'Conor Dooley ' <conor.dooley@microchip.com>, 'Rob Herring ' <robh@kernel.org>, 'Jisheng Zhang ' <jszhang@kernel.org> Subject: [PATCH v3 4/6] RISC-V: Use Zicboz in clear_page when available Date: Mon, 30 Jan 2023 13:01:26 +0100 [thread overview] Message-ID: <20230130120128.1349464-5-ajones@ventanamicro.com> (raw) In-Reply-To: <20230130120128.1349464-1-ajones@ventanamicro.com> Using memset() to zero a 4K page takes 563 total instructions where 20 are branches. clear_page() with Zicboz takes 150 total instructions where 16 are branches. We could reduce the numbers by further unrolling, but, since the cboz block size isn't fixed, we'd need a Duff device to ensure we don't execute too many unrolled steps. Also, cbo.zero doesn't take an offset, so each unrolled step requires it and an add instruction. This increases the chance for icache misses if we unroll many times. For these reasons we only unroll four times. Unrolling four times should be safe as it supports cboz block sizes up to 1K when used with 4K pages and it's only 24 to 32 bytes of unrolled instructions. Another note about the Duff device idea is that it would probably be best to store the number of steps needed at boot time and then load the value in clear_page(). Calculating it in clear_page(), particularly without the Zbb extension, would not be efficient. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Acked-by: Conor Dooley <conor.dooley@microchip.com> --- arch/riscv/Kconfig | 13 +++++++++++ arch/riscv/include/asm/insn-def.h | 4 ++++ arch/riscv/include/asm/page.h | 6 +++++- arch/riscv/lib/Makefile | 1 + arch/riscv/lib/clear_page.S | 36 +++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/lib/clear_page.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 33bbdc33cef8..3759a2f6edd5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -432,6 +432,19 @@ config RISCV_ISA_ZICBOM If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOZ + bool "Zicboz extension support for faster zeroing of memory" + depends on !XIP_KERNEL && MMU + select RISCV_ALTERNATIVE + default y + help + Enable the use of the ZICBOZ extension (cbo.zero instruction) + when available. + + The Zicboz extension is used for faster zeroing of memory. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZIHINTPAUSE bool default y diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index e01ab51f50d2..6960beb75f32 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -192,4 +192,8 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(2)) +#define CBO_zero(base) \ + INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ + RS1(base), SIMM12(4)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 9f432c1b5289..ccd168fe29d2 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -49,10 +49,14 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_RISCV_ISA_ZICBOZ +void clear_page(void *page); +#else #define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) +#endif #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) -#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) +#define clear_user_page(pgaddr, vaddr, page) clear_page(pgaddr) #define copy_user_page(vto, vfrom, vaddr, topg) \ memcpy((vto), (vfrom), PAGE_SIZE) diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 25d5c9664e57..9ee5e2ab5143 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -5,5 +5,6 @@ lib-y += memset.o lib-y += memmove.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S new file mode 100644 index 000000000000..49f29139a5b6 --- /dev/null +++ b/arch/riscv/lib/clear_page.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> +#include <asm/insn-def.h> +#include <asm/page.h> + +/* void clear_page(void *page) */ +ENTRY(__clear_page) +WEAK(clear_page) + li a2, PAGE_SIZE + ALTERNATIVE("j .Lno_zicboz", "nop", + 0, RISCV_ISA_EXT_ZICBOZ, CONFIG_RISCV_ISA_ZICBOZ) + la a1, riscv_cboz_block_size + lw a1, 0(a1) + add a2, a0, a2 +.Lzero_loop: + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + bltu a0, a2, .Lzero_loop + ret +.Lno_zicboz: + li a1, 0 + tail __memset +END(__clear_page) -- 2.39.1 _______________________________________________ linux-riscv mailing list linux-riscv@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-riscv
next prev parent reply other threads:[~2023-01-30 12:01 UTC|newest] Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-01-30 12:01 [PATCH v3 0/6] RISC-V: Apply Zicboz to clear_page Andrew Jones 2023-01-30 12:01 ` Andrew Jones 2023-01-30 12:01 ` [PATCH v3 1/6] RISC-V: Factor out body of riscv_init_cbom_blocksize loop Andrew Jones 2023-01-30 12:01 ` Andrew Jones 2023-01-30 12:01 ` [PATCH v3 2/6] dt-bindings: riscv: Document cboz-block-size Andrew Jones 2023-01-30 12:01 ` Andrew Jones 2023-01-30 12:25 ` Conor Dooley 2023-01-30 12:25 ` Conor Dooley 2023-01-30 22:57 ` Rob Herring 2023-01-30 22:57 ` Rob Herring 2023-01-30 12:01 ` [PATCH v3 3/6] RISC-V: Add Zicboz detection and block size parsing Andrew Jones 2023-01-30 12:01 ` Andrew Jones 2023-01-30 12:01 ` Andrew Jones [this message] 2023-01-30 12:01 ` [PATCH v3 4/6] RISC-V: Use Zicboz in clear_page when available Andrew Jones 2023-02-02 4:35 ` Palmer Dabbelt 2023-02-02 4:35 ` Palmer Dabbelt 2023-02-02 7:41 ` Andrew Jones 2023-02-02 7:41 ` Andrew Jones 2023-01-30 12:01 ` [PATCH v3 5/6] RISC-V: KVM: Provide UAPI for Zicboz block size Andrew Jones 2023-01-30 12:01 ` Andrew Jones 2023-01-30 12:01 ` [PATCH v3 6/6] RISC-V: KVM: Expose Zicboz to the guest Andrew Jones 2023-01-30 12:01 ` Andrew Jones 2023-01-30 18:30 ` [PATCH v3 0/6] RISC-V: Apply Zicboz to clear_page Jeff Law 2023-01-30 18:47 ` Andrew Jones 2023-01-30 18:55 ` Jeff Law 2023-01-30 18:55 ` Jeff Law
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230130120128.1349464-5-ajones@ventanamicro.com \ --to=ajones@ventanamicro.com \ --cc=aou@eecs.berkeley.edu \ --cc=apatel@ventanamicro.com \ --cc=atishp@rivosinc.com \ --cc=conor.dooley@microchip.com \ --cc=devicetree@vger.kernel.org \ --cc=heiko@sntech.de \ --cc=jszhang@kernel.org \ --cc=krzysztof.kozlowski+dt@linaro.org \ --cc=kvm-riscv@lists.infradead.org \ --cc=linux-riscv@lists.infradead.org \ --cc=palmer@dabbelt.com \ --cc=paul.walmsley@sifive.com \ --cc=robh@kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.