From: Evan Green <evan@rivosinc.com> To: Palmer Dabbelt <palmer@rivosinc.com> Cc: Anup Patel <apatel@ventanamicro.com>, Albert Ou <aou@eecs.berkeley.edu>, Heiko Stuebner <heiko@sntech.de>, Ley Foon Tan <leyfoon.tan@starfivetech.com>, Marc Zyngier <maz@kernel.org>, linux-kernel@vger.kernel.org, Conor Dooley <conor.dooley@microchip.com>, David Laight <David.Laight@aculab.com>, Palmer Dabbelt <palmer@dabbelt.com>, Evan Green <evan@rivosinc.com>, Jisheng Zhang <jszhang@kernel.org>, Paul Walmsley <paul.walmsley@sifive.com>, Greentime Hu <greentime.hu@sifive.com>, linux-riscv@lists.infradead.org, Andrew Jones <ajones@ventanamicro.com> Subject: [PATCH] RISC-V: Probe misaligned access speed in parallel Date: Fri, 15 Sep 2023 11:49:03 -0700 [thread overview] Message-ID: <20230915184904.1976183-1-evan@rivosinc.com> (raw) Probing for misaligned access speed takes about 0.06 seconds. On a system with 64 cores, doing this in smp_callin() means it's done serially, extending boot time by 3.8 seconds. That's a lot of boot time. Instead of measuring each CPU serially, let's do the measurements on all CPUs in parallel. If we disable preemption on all CPUs, the jiffies stop ticking, so we can do this in stages of 1) everybody except core 0, then 2) core 0. The measurement call in smp_callin() stays around, but is now conditionalized to only run if a new CPU shows up after the round of in-parallel measurements has run. The goal is to have the measurement call not run during boot or suspend/resume, but only on a hotplug addition. Signed-off-by: Evan Green <evan@rivosinc.com> --- Jisheng, I didn't add your Tested-by tag since the patch evolved from the one you tested. Hopefully this one brings you the same result. --- arch/riscv/include/asm/cpufeature.h | 3 ++- arch/riscv/kernel/cpufeature.c | 28 +++++++++++++++++++++++----- arch/riscv/kernel/smpboot.c | 11 ++++++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index d0345bd659c9..19e7817eba10 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -30,6 +30,7 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void check_unaligned_access(int cpu); +extern bool misaligned_speed_measured; +int check_unaligned_access(void *unused); #endif diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 1cfbba65d11a..8eb36e1dfb95 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -42,6 +42,9 @@ struct riscv_isainfo hart_isa[NR_CPUS]; /* Performance information */ DEFINE_PER_CPU(long, misaligned_access_speed); +/* Boot-time in-parallel unaligned access measurement has occurred. */ +bool misaligned_speed_measured; + /** * riscv_isa_extension_base() - Get base extension word * @@ -556,8 +559,9 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void check_unaligned_access(int cpu) +int check_unaligned_access(void *unused) { + int cpu = smp_processor_id(); u64 start_cycles, end_cycles; u64 word_cycles; u64 byte_cycles; @@ -571,7 +575,7 @@ void check_unaligned_access(int cpu) page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); if (!page) { pr_warn("Can't alloc pages to measure memcpy performance"); - return; + return 0; } /* Make an unaligned destination buffer. */ @@ -643,15 +647,29 @@ void check_unaligned_access(int cpu) out: __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); + return 0; +} + +static void check_unaligned_access_nonboot_cpu(void *param) +{ + if (smp_processor_id() != 0) + check_unaligned_access(param); } -static int check_unaligned_access_boot_cpu(void) +static int check_unaligned_access_all_cpus(void) { - check_unaligned_access(0); + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, NULL, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, NULL, true); + + /* Boot-time measurements are complete. */ + misaligned_speed_measured = true; return 0; } -arch_initcall(check_unaligned_access_boot_cpu); +arch_initcall(check_unaligned_access_all_cpus); #ifdef CONFIG_RISCV_ALTERNATIVE /* diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 1b8da4e40a4d..39322ae20a75 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -27,6 +27,7 @@ #include <linux/sched/mm.h> #include <asm/cpu_ops.h> #include <asm/cpufeature.h> +#include <asm/hwprobe.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/numa.h> @@ -246,7 +247,15 @@ asmlinkage __visible void smp_callin(void) numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); - check_unaligned_access(curr_cpuid); + + /* + * Boot-time misaligned access speed measurements are done in parallel + * in an initcall. Only measure here for hotplug. + */ + if (misaligned_speed_measured && + (per_cpu(misaligned_access_speed, curr_cpuid) == RISCV_HWPROBE_MISALIGNED_UNKNOWN)) { + check_unaligned_access(NULL); + } if (has_vector()) { if (riscv_v_setup_vsize()) -- 2.34.1 _______________________________________________ linux-riscv mailing list linux-riscv@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-riscv
WARNING: multiple messages have this Message-ID (diff)
From: Evan Green <evan@rivosinc.com> To: Palmer Dabbelt <palmer@rivosinc.com> Cc: David Laight <David.Laight@aculab.com>, Jisheng Zhang <jszhang@kernel.org>, Evan Green <evan@rivosinc.com>, Albert Ou <aou@eecs.berkeley.edu>, Andrew Jones <ajones@ventanamicro.com>, Anup Patel <apatel@ventanamicro.com>, Conor Dooley <conor.dooley@microchip.com>, Greentime Hu <greentime.hu@sifive.com>, Heiko Stuebner <heiko@sntech.de>, Ley Foon Tan <leyfoon.tan@starfivetech.com>, Marc Zyngier <maz@kernel.org>, Palmer Dabbelt <palmer@dabbelt.com>, Paul Walmsley <paul.walmsley@sifive.com>, Sunil V L <sunilvl@ventanamicro.com>, linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org Subject: [PATCH] RISC-V: Probe misaligned access speed in parallel Date: Fri, 15 Sep 2023 11:49:03 -0700 [thread overview] Message-ID: <20230915184904.1976183-1-evan@rivosinc.com> (raw) Probing for misaligned access speed takes about 0.06 seconds. On a system with 64 cores, doing this in smp_callin() means it's done serially, extending boot time by 3.8 seconds. That's a lot of boot time. Instead of measuring each CPU serially, let's do the measurements on all CPUs in parallel. If we disable preemption on all CPUs, the jiffies stop ticking, so we can do this in stages of 1) everybody except core 0, then 2) core 0. The measurement call in smp_callin() stays around, but is now conditionalized to only run if a new CPU shows up after the round of in-parallel measurements has run. The goal is to have the measurement call not run during boot or suspend/resume, but only on a hotplug addition. Signed-off-by: Evan Green <evan@rivosinc.com> --- Jisheng, I didn't add your Tested-by tag since the patch evolved from the one you tested. Hopefully this one brings you the same result. --- arch/riscv/include/asm/cpufeature.h | 3 ++- arch/riscv/kernel/cpufeature.c | 28 +++++++++++++++++++++++----- arch/riscv/kernel/smpboot.c | 11 ++++++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index d0345bd659c9..19e7817eba10 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -30,6 +30,7 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void check_unaligned_access(int cpu); +extern bool misaligned_speed_measured; +int check_unaligned_access(void *unused); #endif diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 1cfbba65d11a..8eb36e1dfb95 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -42,6 +42,9 @@ struct riscv_isainfo hart_isa[NR_CPUS]; /* Performance information */ DEFINE_PER_CPU(long, misaligned_access_speed); +/* Boot-time in-parallel unaligned access measurement has occurred. */ +bool misaligned_speed_measured; + /** * riscv_isa_extension_base() - Get base extension word * @@ -556,8 +559,9 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void check_unaligned_access(int cpu) +int check_unaligned_access(void *unused) { + int cpu = smp_processor_id(); u64 start_cycles, end_cycles; u64 word_cycles; u64 byte_cycles; @@ -571,7 +575,7 @@ void check_unaligned_access(int cpu) page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); if (!page) { pr_warn("Can't alloc pages to measure memcpy performance"); - return; + return 0; } /* Make an unaligned destination buffer. */ @@ -643,15 +647,29 @@ void check_unaligned_access(int cpu) out: __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); + return 0; +} + +static void check_unaligned_access_nonboot_cpu(void *param) +{ + if (smp_processor_id() != 0) + check_unaligned_access(param); } -static int check_unaligned_access_boot_cpu(void) +static int check_unaligned_access_all_cpus(void) { - check_unaligned_access(0); + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, NULL, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, NULL, true); + + /* Boot-time measurements are complete. */ + misaligned_speed_measured = true; return 0; } -arch_initcall(check_unaligned_access_boot_cpu); +arch_initcall(check_unaligned_access_all_cpus); #ifdef CONFIG_RISCV_ALTERNATIVE /* diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 1b8da4e40a4d..39322ae20a75 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -27,6 +27,7 @@ #include <linux/sched/mm.h> #include <asm/cpu_ops.h> #include <asm/cpufeature.h> +#include <asm/hwprobe.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/numa.h> @@ -246,7 +247,15 @@ asmlinkage __visible void smp_callin(void) numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); - check_unaligned_access(curr_cpuid); + + /* + * Boot-time misaligned access speed measurements are done in parallel + * in an initcall. Only measure here for hotplug. + */ + if (misaligned_speed_measured && + (per_cpu(misaligned_access_speed, curr_cpuid) == RISCV_HWPROBE_MISALIGNED_UNKNOWN)) { + check_unaligned_access(NULL); + } if (has_vector()) { if (riscv_v_setup_vsize()) -- 2.34.1
next reply other threads:[~2023-09-15 18:49 UTC|newest] Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-09-15 18:49 Evan Green [this message] 2023-09-15 18:49 ` [PATCH] RISC-V: Probe misaligned access speed in parallel Evan Green 2023-09-16 0:16 ` Conor Dooley 2023-09-16 0:16 ` Conor Dooley 2023-09-16 6:45 ` Andrew Jones 2023-09-16 6:45 ` Andrew Jones 2023-09-16 8:39 ` Jisheng Zhang 2023-09-16 8:39 ` Jisheng Zhang 2023-11-01 11:31 ` Jisheng Zhang 2023-11-01 11:31 ` Jisheng Zhang 2023-11-01 17:28 ` Evan Green 2023-11-01 17:28 ` Evan Green 2023-11-02 17:07 ` Jisheng Zhang 2023-11-02 17:07 ` Jisheng Zhang 2023-11-02 22:41 ` Evan Green 2023-11-02 22:41 ` Evan Green 2023-11-03 8:34 ` Conor Dooley 2023-11-03 8:34 ` Conor Dooley
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230915184904.1976183-1-evan@rivosinc.com \ --to=evan@rivosinc.com \ --cc=David.Laight@aculab.com \ --cc=ajones@ventanamicro.com \ --cc=aou@eecs.berkeley.edu \ --cc=apatel@ventanamicro.com \ --cc=conor.dooley@microchip.com \ --cc=greentime.hu@sifive.com \ --cc=heiko@sntech.de \ --cc=jszhang@kernel.org \ --cc=leyfoon.tan@starfivetech.com \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-riscv@lists.infradead.org \ --cc=maz@kernel.org \ --cc=palmer@dabbelt.com \ --cc=palmer@rivosinc.com \ --cc=paul.walmsley@sifive.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.