linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] riscv: Use CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to set misaligned access speed
@ 2024-02-01  6:40 Charlie Jenkins
  2024-02-01  6:40 ` [PATCH 1/2] riscv: lib: Introduce has_fast_misaligned_access function Charlie Jenkins
  2024-02-01  6:40 ` [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS Charlie Jenkins
  0 siblings, 2 replies; 9+ messages in thread
From: Charlie Jenkins @ 2024-02-01  6:40 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou, Jisheng Zhang, Evan Green
  Cc: linux-riscv, linux-kernel, Charlie Jenkins

If CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, no time needs to
be spent in the misaligned access speed probe. Disable the probe in this
case and set respective uses to "fast" misaligned accesses. On riscv,
this config is selected if RISCV_EFFICIENT_UNALIGNED_ACCESS is selected,
which is dependent on NONPORTABLE.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
Charlie Jenkins (2):
      riscv: lib: Introduce has_fast_misaligned_access function
      riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS

 arch/riscv/include/asm/cpufeature.h  | 13 +++++++++++++
 arch/riscv/kernel/cpufeature.c       |  4 ++++
 arch/riscv/kernel/sys_hwprobe.c      |  4 ++++
 arch/riscv/kernel/traps_misaligned.c |  4 ++++
 arch/riscv/lib/csum.c                |  5 +----
 5 files changed, 26 insertions(+), 4 deletions(-)
---
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
change-id: 20240131-disable_misaligned_probe_config-043aea375f93
-- 
- Charlie


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] riscv: lib: Introduce has_fast_misaligned_access function
  2024-02-01  6:40 [PATCH 0/2] riscv: Use CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to set misaligned access speed Charlie Jenkins
@ 2024-02-01  6:40 ` Charlie Jenkins
  2024-02-01  6:40 ` [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS Charlie Jenkins
  1 sibling, 0 replies; 9+ messages in thread
From: Charlie Jenkins @ 2024-02-01  6:40 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou, Jisheng Zhang, Evan Green
  Cc: linux-riscv, linux-kernel, Charlie Jenkins

Create has_fast_misaligned_access to avoid needing to explicitly check
the fast_misaligned_access_speed_key static key.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/include/asm/cpufeature.h | 6 ++++++
 arch/riscv/lib/csum.c               | 5 +----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 5a626ed2c47a..dfdcca229174 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -28,7 +28,9 @@ struct riscv_isainfo {
 
 DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 DECLARE_PER_CPU(long, misaligned_access_speed);
+#endif
 
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
@@ -137,4 +139,8 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
 
 DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
 
+static __always_inline bool has_fast_misaligned_accesses(void)
+{
+	return static_branch_likely(&fast_misaligned_access_speed_key);
+}
 #endif
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index af3df5274ccb..399fa09bf4cb 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -318,10 +318,7 @@ unsigned int do_csum(const unsigned char *buff, int len)
 	 * branches. The largest chunk of overlap was delegated into the
 	 * do_csum_common function.
 	 */
-	if (static_branch_likely(&fast_misaligned_access_speed_key))
-		return do_csum_no_alignment(buff, len);
-
-	if (((unsigned long)buff & OFFSET_MASK) == 0)
+	if (has_fast_misaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0b101))
 		return do_csum_no_alignment(buff, len);
 
 	return do_csum_with_alignment(buff, len);

-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01  6:40 [PATCH 0/2] riscv: Use CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to set misaligned access speed Charlie Jenkins
  2024-02-01  6:40 ` [PATCH 1/2] riscv: lib: Introduce has_fast_misaligned_access function Charlie Jenkins
@ 2024-02-01  6:40 ` Charlie Jenkins
  2024-02-01 13:43   ` Clément Léger
  1 sibling, 1 reply; 9+ messages in thread
From: Charlie Jenkins @ 2024-02-01  6:40 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou, Jisheng Zhang, Evan Green
  Cc: linux-riscv, linux-kernel, Charlie Jenkins

When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
set to have fast misaligned access without needing to probe.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/include/asm/cpufeature.h  | 7 +++++++
 arch/riscv/kernel/cpufeature.c       | 4 ++++
 arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
 arch/riscv/kernel/traps_misaligned.c | 4 ++++
 4 files changed, 19 insertions(+)

diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index dfdcca229174..7d8d64783e38 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
 	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
 
 static __always_inline bool has_fast_misaligned_accesses(void)
 {
 	return static_branch_likely(&fast_misaligned_access_speed_key);
 }
+#else
+static __always_inline bool has_fast_misaligned_accesses(void)
+{
+	return true;
+}
+#endif
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 89920f84d0a3..d787846c0b68 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 /* Per-cpu ISA extensions. */
 struct riscv_isainfo hart_isa[NR_CPUS];
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 /* Performance information */
 DEFINE_PER_CPU(long, misaligned_access_speed);
 
 static cpumask_t fast_misaligned_access;
+#endif
 
 /**
  * riscv_isa_extension_base() - Get base extension word
@@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
 	return hwcap;
 }
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 static int check_unaligned_access(void *param)
 {
 	int cpu = smp_processor_id();
@@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
 }
 
 arch_initcall(check_unaligned_access_all_cpus);
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
 
 void riscv_user_isa_enable(void)
 {
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index a7c56b41efd2..3f1a6edfdb08 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
 
 static u64 hwprobe_misaligned(const struct cpumask *cpus)
 {
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	int cpu;
 	u64 perf = -1ULL;
 
@@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
 		return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
 
 	return perf;
+#else
+	return RISCV_HWPROBE_MISALIGNED_FAST;
+#endif
 }
 
 static void hwprobe_one_pair(struct riscv_hwprobe *pair,
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 8ded225e8c5b..c24f79d769f6 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
 
 	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+#endif
 
 	if (!unaligned_enabled)
 		return -1;
@@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
 	return 0;
 }
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 bool check_unaligned_access_emulated(int cpu)
 {
 	long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
@@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
 	}
 	unaligned_ctl = true;
 }
+#endif
 
 bool unaligned_ctl_available(void)
 {

-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01  6:40 ` [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS Charlie Jenkins
@ 2024-02-01 13:43   ` Clément Léger
  2024-02-01 19:10     ` Charlie Jenkins
  0 siblings, 1 reply; 9+ messages in thread
From: Clément Léger @ 2024-02-01 13:43 UTC (permalink / raw)
  To: Charlie Jenkins, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Jisheng Zhang, Evan Green
  Cc: linux-riscv, linux-kernel



On 01/02/2024 07:40, Charlie Jenkins wrote:
> When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> set to have fast misaligned access without needing to probe.
> 
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
>  arch/riscv/include/asm/cpufeature.h  | 7 +++++++
>  arch/riscv/kernel/cpufeature.c       | 4 ++++
>  arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
>  arch/riscv/kernel/traps_misaligned.c | 4 ++++
>  4 files changed, 19 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index dfdcca229174..7d8d64783e38 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
>  	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
>  }
>  
> +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
>  
>  static __always_inline bool has_fast_misaligned_accesses(void)
>  {
>  	return static_branch_likely(&fast_misaligned_access_speed_key);
>  }
> +#else
> +static __always_inline bool has_fast_misaligned_accesses(void)
> +{
> +	return true;
> +}
> +#endif
>  #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 89920f84d0a3..d787846c0b68 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
>  /* Per-cpu ISA extensions. */
>  struct riscv_isainfo hart_isa[NR_CPUS];
>  
> +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>  /* Performance information */
>  DEFINE_PER_CPU(long, misaligned_access_speed);
>  
>  static cpumask_t fast_misaligned_access;
> +#endif
>  
>  /**
>   * riscv_isa_extension_base() - Get base extension word
> @@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
>  	return hwcap;
>  }
>  
> +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>  static int check_unaligned_access(void *param)
>  {
>  	int cpu = smp_processor_id();
> @@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
>  }
>  
>  arch_initcall(check_unaligned_access_all_cpus);
> +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
>  
>  void riscv_user_isa_enable(void)
>  {

Hi Charlie,

Generally, having so much ifdef in various pieces of code is probably
not a good idea.

AFAICT, if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, the whole
misaligned access speed checking could be opt-out. which means that
probably everything related to misaligned accesses should be moved in
it's own file build it only for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=n
only.

> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index a7c56b41efd2..3f1a6edfdb08 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
>  
>  static u64 hwprobe_misaligned(const struct cpumask *cpus)
>  {
> +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>  	int cpu;
>  	u64 perf = -1ULL;
>  
> @@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
>  		return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
>  
>  	return perf;
> +#else
> +	return RISCV_HWPROBE_MISALIGNED_FAST;
> +#endif
>  }
>  
>  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned> index 8ded225e8c5b..c24f79d769f6 100644
> --- a/arch/riscv/kernel/traps_misaligned.c
> +++ b/arch/riscv/kernel/traps_misaligned.c
> @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
>  
>  	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
>  
> +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>  	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> +#endif

I think that rather using ifdefery inside this file (traps_misaligned.c)
 it can be totally opt-out in case we have
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS since it implies that misaligned
accesses are not emulated (at least that is my understanding).

Thanks,

Clément


>  
>  	if (!unaligned_enabled)
>  		return -1;
> @@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
>  	return 0;
>  }
>  
> +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>  bool check_unaligned_access_emulated(int cpu)
>  {
>  	long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
> @@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
>  	}
>  	unaligned_ctl = true;
>  }
> +#endif
>  
>  bool unaligned_ctl_available(void)
>  {
> 




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01 13:43   ` Clément Léger
@ 2024-02-01 19:10     ` Charlie Jenkins
  2024-02-01 19:57       ` Charles Lohr
  0 siblings, 1 reply; 9+ messages in thread
From: Charlie Jenkins @ 2024-02-01 19:10 UTC (permalink / raw)
  To: Clément Léger
  Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Jisheng Zhang,
	Evan Green, linux-riscv, linux-kernel

On Thu, Feb 01, 2024 at 02:43:43PM +0100, Clément Léger wrote:
> 
> 
> On 01/02/2024 07:40, Charlie Jenkins wrote:
> > When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> > set to have fast misaligned access without needing to probe.
> > 
> > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > ---
> >  arch/riscv/include/asm/cpufeature.h  | 7 +++++++
> >  arch/riscv/kernel/cpufeature.c       | 4 ++++
> >  arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
> >  arch/riscv/kernel/traps_misaligned.c | 4 ++++
> >  4 files changed, 19 insertions(+)
> > 
> > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > index dfdcca229174..7d8d64783e38 100644
> > --- a/arch/riscv/include/asm/cpufeature.h
> > +++ b/arch/riscv/include/asm/cpufeature.h
> > @@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> >  	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> >  }
> >  
> > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> >  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> >  
> >  static __always_inline bool has_fast_misaligned_accesses(void)
> >  {
> >  	return static_branch_likely(&fast_misaligned_access_speed_key);
> >  }
> > +#else
> > +static __always_inline bool has_fast_misaligned_accesses(void)
> > +{
> > +	return true;
> > +}
> > +#endif
> >  #endif
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index 89920f84d0a3..d787846c0b68 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> >  /* Per-cpu ISA extensions. */
> >  struct riscv_isainfo hart_isa[NR_CPUS];
> >  
> > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> >  /* Performance information */
> >  DEFINE_PER_CPU(long, misaligned_access_speed);
> >  
> >  static cpumask_t fast_misaligned_access;
> > +#endif
> >  
> >  /**
> >   * riscv_isa_extension_base() - Get base extension word
> > @@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
> >  	return hwcap;
> >  }
> >  
> > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> >  static int check_unaligned_access(void *param)
> >  {
> >  	int cpu = smp_processor_id();
> > @@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
> >  }
> >  
> >  arch_initcall(check_unaligned_access_all_cpus);
> > +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
> >  
> >  void riscv_user_isa_enable(void)
> >  {
> 
> Hi Charlie,
> 
> Generally, having so much ifdef in various pieces of code is probably
> not a good idea.
> 
> AFAICT, if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, the whole
> misaligned access speed checking could be opt-out. which means that
> probably everything related to misaligned accesses should be moved in
> it's own file build it only for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=n
> only.

I will look into doing something more clever here! I agree it is not
very nice to have so many ifdefs scattered.

> 
> > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> > index a7c56b41efd2..3f1a6edfdb08 100644
> > --- a/arch/riscv/kernel/sys_hwprobe.c
> > +++ b/arch/riscv/kernel/sys_hwprobe.c
> > @@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
> >  
> >  static u64 hwprobe_misaligned(const struct cpumask *cpus)
> >  {
> > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> >  	int cpu;
> >  	u64 perf = -1ULL;
> >  
> > @@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
> >  		return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> >  
> >  	return perf;
> > +#else
> > +	return RISCV_HWPROBE_MISALIGNED_FAST;
> > +#endif
> >  }
> >  
> >  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> > diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned> index 8ded225e8c5b..c24f79d769f6 100644
> > --- a/arch/riscv/kernel/traps_misaligned.c
> > +++ b/arch/riscv/kernel/traps_misaligned.c
> > @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> >  
> >  	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
> >  
> > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> >  	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> > +#endif
> 
> I think that rather using ifdefery inside this file (traps_misaligned.c)
>  it can be totally opt-out in case we have
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS since it implies that misaligned
> accesses are not emulated (at least that is my understanding).
> 

That's a great idea, I believe that is correct.

- Charlie

> Thanks,
> 
> Clément
> 
> 
> >  
> >  	if (!unaligned_enabled)
> >  		return -1;
> > @@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
> >  	return 0;
> >  }
> >  
> > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> >  bool check_unaligned_access_emulated(int cpu)
> >  {
> >  	long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
> > @@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
> >  	}
> >  	unaligned_ctl = true;
> >  }
> > +#endif
> >  
> >  bool unaligned_ctl_available(void)
> >  {
> > 
> 
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01 19:10     ` Charlie Jenkins
@ 2024-02-01 19:57       ` Charles Lohr
  2024-02-01 20:47         ` Charlie Jenkins
  0 siblings, 1 reply; 9+ messages in thread
From: Charles Lohr @ 2024-02-01 19:57 UTC (permalink / raw)
  To: Charlie Jenkins
  Cc: Clément Léger, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Jisheng Zhang, Evan Green, linux-riscv, linux-kernel

I am a little confused here - I was testing with 6.8-rc1 and it didn't
seem to have the behavior of performing the probe (The probe kills
boot performance in my application and I've had to patch out the probe
in mid-6.x kernels).

Did something get reverted to bring back the probe even when
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=Y between rc1 and trunk?  Or am
I misremembering/accidentally patched?

On Thu, Feb 1, 2024 at 11:10 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> On Thu, Feb 01, 2024 at 02:43:43PM +0100, Clément Léger wrote:
> >
> >
> > On 01/02/2024 07:40, Charlie Jenkins wrote:
> > > When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> > > set to have fast misaligned access without needing to probe.
> > >
> > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > ---
> > >  arch/riscv/include/asm/cpufeature.h  | 7 +++++++
> > >  arch/riscv/kernel/cpufeature.c       | 4 ++++
> > >  arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
> > >  arch/riscv/kernel/traps_misaligned.c | 4 ++++
> > >  4 files changed, 19 insertions(+)
> > >
> > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > > index dfdcca229174..7d8d64783e38 100644
> > > --- a/arch/riscv/include/asm/cpufeature.h
> > > +++ b/arch/riscv/include/asm/cpufeature.h
> > > @@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> > >     return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > >  }
> > >
> > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > >  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> > >
> > >  static __always_inline bool has_fast_misaligned_accesses(void)
> > >  {
> > >     return static_branch_likely(&fast_misaligned_access_speed_key);
> > >  }
> > > +#else
> > > +static __always_inline bool has_fast_misaligned_accesses(void)
> > > +{
> > > +   return true;
> > > +}
> > > +#endif
> > >  #endif
> > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > index 89920f84d0a3..d787846c0b68 100644
> > > --- a/arch/riscv/kernel/cpufeature.c
> > > +++ b/arch/riscv/kernel/cpufeature.c
> > > @@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> > >  /* Per-cpu ISA extensions. */
> > >  struct riscv_isainfo hart_isa[NR_CPUS];
> > >
> > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > >  /* Performance information */
> > >  DEFINE_PER_CPU(long, misaligned_access_speed);
> > >
> > >  static cpumask_t fast_misaligned_access;
> > > +#endif
> > >
> > >  /**
> > >   * riscv_isa_extension_base() - Get base extension word
> > > @@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
> > >     return hwcap;
> > >  }
> > >
> > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > >  static int check_unaligned_access(void *param)
> > >  {
> > >     int cpu = smp_processor_id();
> > > @@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
> > >  }
> > >
> > >  arch_initcall(check_unaligned_access_all_cpus);
> > > +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
> > >
> > >  void riscv_user_isa_enable(void)
> > >  {
> >
> > Hi Charlie,
> >
> > Generally, having so much ifdef in various pieces of code is probably
> > not a good idea.
> >
> > AFAICT, if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, the whole
> > misaligned access speed checking could be opt-out. which means that
> > probably everything related to misaligned accesses should be moved in
> > it's own file build it only for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=n
> > only.
>
> I will look into doing something more clever here! I agree it is not
> very nice to have so many ifdefs scattered.
>
> >
> > > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> > > index a7c56b41efd2..3f1a6edfdb08 100644
> > > --- a/arch/riscv/kernel/sys_hwprobe.c
> > > +++ b/arch/riscv/kernel/sys_hwprobe.c
> > > @@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
> > >
> > >  static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > >  {
> > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > >     int cpu;
> > >     u64 perf = -1ULL;
> > >
> > > @@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > >             return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> > >
> > >     return perf;
> > > +#else
> > > +   return RISCV_HWPROBE_MISALIGNED_FAST;
> > > +#endif
> > >  }
> > >
> > >  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> > > diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned> index 8ded225e8c5b..c24f79d769f6 100644
> > > --- a/arch/riscv/kernel/traps_misaligned.c
> > > +++ b/arch/riscv/kernel/traps_misaligned.c
> > > @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> > >
> > >     perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
> > >
> > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > >     *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> > > +#endif
> >
> > I think that rather using ifdefery inside this file (traps_misaligned.c)
> >  it can be totally opt-out in case we have
> > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS since it implies that misaligned
> > accesses are not emulated (at least that is my understanding).
> >
>
> That's a great idea, I believe that is correct.
>
> - Charlie
>
> > Thanks,
> >
> > Clément
> >
> >
> > >
> > >     if (!unaligned_enabled)
> > >             return -1;
> > > @@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
> > >     return 0;
> > >  }
> > >
> > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > >  bool check_unaligned_access_emulated(int cpu)
> > >  {
> > >     long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
> > > @@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
> > >     }
> > >     unaligned_ctl = true;
> > >  }
> > > +#endif
> > >
> > >  bool unaligned_ctl_available(void)
> > >  {
> > >
> >
> >
> >
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01 19:57       ` Charles Lohr
@ 2024-02-01 20:47         ` Charlie Jenkins
  2024-02-01 21:39           ` Charles Lohr
  0 siblings, 1 reply; 9+ messages in thread
From: Charlie Jenkins @ 2024-02-01 20:47 UTC (permalink / raw)
  To: Charles Lohr
  Cc: Clément Léger, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Jisheng Zhang, Evan Green, linux-riscv, linux-kernel

On Thu, Feb 01, 2024 at 11:57:04AM -0800, Charles Lohr wrote:
> I am a little confused here - I was testing with 6.8-rc1 and it didn't
> seem to have the behavior of performing the probe (The probe kills
> boot performance in my application and I've had to patch out the probe
> in mid-6.x kernels).
> 
> Did something get reverted to bring back the probe even when
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=Y between rc1 and trunk?  Or am
> I misremembering/accidentally patched?

After pulling a clean version of 6.8-rc1 and setting
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS I still see the probe happen.
Before sending this I looked for a patch that disabled the probe but was
unable to find one, if there exists a patch can you point me to it?

- Charlie

> 
> On Thu, Feb 1, 2024 at 11:10 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > On Thu, Feb 01, 2024 at 02:43:43PM +0100, Clément Léger wrote:
> > >
> > >
> > > On 01/02/2024 07:40, Charlie Jenkins wrote:
> > > > When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> > > > set to have fast misaligned access without needing to probe.
> > > >
> > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > ---
> > > >  arch/riscv/include/asm/cpufeature.h  | 7 +++++++
> > > >  arch/riscv/kernel/cpufeature.c       | 4 ++++
> > > >  arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
> > > >  arch/riscv/kernel/traps_misaligned.c | 4 ++++
> > > >  4 files changed, 19 insertions(+)
> > > >
> > > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > > > index dfdcca229174..7d8d64783e38 100644
> > > > --- a/arch/riscv/include/asm/cpufeature.h
> > > > +++ b/arch/riscv/include/asm/cpufeature.h
> > > > @@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> > > >     return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > > >  }
> > > >
> > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > >  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> > > >
> > > >  static __always_inline bool has_fast_misaligned_accesses(void)
> > > >  {
> > > >     return static_branch_likely(&fast_misaligned_access_speed_key);
> > > >  }
> > > > +#else
> > > > +static __always_inline bool has_fast_misaligned_accesses(void)
> > > > +{
> > > > +   return true;
> > > > +}
> > > > +#endif
> > > >  #endif
> > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > > index 89920f84d0a3..d787846c0b68 100644
> > > > --- a/arch/riscv/kernel/cpufeature.c
> > > > +++ b/arch/riscv/kernel/cpufeature.c
> > > > @@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> > > >  /* Per-cpu ISA extensions. */
> > > >  struct riscv_isainfo hart_isa[NR_CPUS];
> > > >
> > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > >  /* Performance information */
> > > >  DEFINE_PER_CPU(long, misaligned_access_speed);
> > > >
> > > >  static cpumask_t fast_misaligned_access;
> > > > +#endif
> > > >
> > > >  /**
> > > >   * riscv_isa_extension_base() - Get base extension word
> > > > @@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
> > > >     return hwcap;
> > > >  }
> > > >
> > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > >  static int check_unaligned_access(void *param)
> > > >  {
> > > >     int cpu = smp_processor_id();
> > > > @@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
> > > >  }
> > > >
> > > >  arch_initcall(check_unaligned_access_all_cpus);
> > > > +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
> > > >
> > > >  void riscv_user_isa_enable(void)
> > > >  {
> > >
> > > Hi Charlie,
> > >
> > > Generally, having so much ifdef in various pieces of code is probably
> > > not a good idea.
> > >
> > > AFAICT, if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, the whole
> > > misaligned access speed checking could be opt-out. which means that
> > > probably everything related to misaligned accesses should be moved in
> > > it's own file build it only for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=n
> > > only.
> >
> > I will look into doing something more clever here! I agree it is not
> > very nice to have so many ifdefs scattered.
> >
> > >
> > > > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> > > > index a7c56b41efd2..3f1a6edfdb08 100644
> > > > --- a/arch/riscv/kernel/sys_hwprobe.c
> > > > +++ b/arch/riscv/kernel/sys_hwprobe.c
> > > > @@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
> > > >
> > > >  static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > > >  {
> > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > >     int cpu;
> > > >     u64 perf = -1ULL;
> > > >
> > > > @@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > > >             return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> > > >
> > > >     return perf;
> > > > +#else
> > > > +   return RISCV_HWPROBE_MISALIGNED_FAST;
> > > > +#endif
> > > >  }
> > > >
> > > >  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> > > > diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned> index 8ded225e8c5b..c24f79d769f6 100644
> > > > --- a/arch/riscv/kernel/traps_misaligned.c
> > > > +++ b/arch/riscv/kernel/traps_misaligned.c
> > > > @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> > > >
> > > >     perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
> > > >
> > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > >     *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> > > > +#endif
> > >
> > > I think that rather using ifdefery inside this file (traps_misaligned.c)
> > >  it can be totally opt-out in case we have
> > > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS since it implies that misaligned
> > > accesses are not emulated (at least that is my understanding).
> > >
> >
> > That's a great idea, I believe that is correct.
> >
> > - Charlie
> >
> > > Thanks,
> > >
> > > Clément
> > >
> > >
> > > >
> > > >     if (!unaligned_enabled)
> > > >             return -1;
> > > > @@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
> > > >     return 0;
> > > >  }
> > > >
> > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > >  bool check_unaligned_access_emulated(int cpu)
> > > >  {
> > > >     long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
> > > > @@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
> > > >     }
> > > >     unaligned_ctl = true;
> > > >  }
> > > > +#endif
> > > >
> > > >  bool unaligned_ctl_available(void)
> > > >  {
> > > >
> > >
> > >
> > >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01 20:47         ` Charlie Jenkins
@ 2024-02-01 21:39           ` Charles Lohr
  2024-02-01 21:49             ` Charlie Jenkins
  0 siblings, 1 reply; 9+ messages in thread
From: Charles Lohr @ 2024-02-01 21:39 UTC (permalink / raw)
  To: Charlie Jenkins
  Cc: Clément Léger, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Jisheng Zhang, Evan Green, linux-riscv, linux-kernel

I am very sorry for wasting your time - I did have it patched out in
the build system here. I can't wait for this feature to land, so I can
enjoy faster boot times without a patch.

Charles

On Thu, Feb 1, 2024 at 12:47 PM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> On Thu, Feb 01, 2024 at 11:57:04AM -0800, Charles Lohr wrote:
> > I am a little confused here - I was testing with 6.8-rc1 and it didn't
> > seem to have the behavior of performing the probe (The probe kills
> > boot performance in my application and I've had to patch out the probe
> > in mid-6.x kernels).
> >
> > Did something get reverted to bring back the probe even when
> > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=Y between rc1 and trunk?  Or am
> > I misremembering/accidentally patched?
>
> After pulling a clean version of 6.8-rc1 and setting
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS I still see the probe happen.
> Before sending this I looked for a patch that disabled the probe but was
> unable to find one, if there exists a patch can you point me to it?
>
> - Charlie
>
> >
> > On Thu, Feb 1, 2024 at 11:10 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> > >
> > > On Thu, Feb 01, 2024 at 02:43:43PM +0100, Clément Léger wrote:
> > > >
> > > >
> > > > On 01/02/2024 07:40, Charlie Jenkins wrote:
> > > > > When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> > > > > set to have fast misaligned access without needing to probe.
> > > > >
> > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > ---
> > > > >  arch/riscv/include/asm/cpufeature.h  | 7 +++++++
> > > > >  arch/riscv/kernel/cpufeature.c       | 4 ++++
> > > > >  arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
> > > > >  arch/riscv/kernel/traps_misaligned.c | 4 ++++
> > > > >  4 files changed, 19 insertions(+)
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > > > > index dfdcca229174..7d8d64783e38 100644
> > > > > --- a/arch/riscv/include/asm/cpufeature.h
> > > > > +++ b/arch/riscv/include/asm/cpufeature.h
> > > > > @@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> > > > >     return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > > > >  }
> > > > >
> > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > >  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> > > > >
> > > > >  static __always_inline bool has_fast_misaligned_accesses(void)
> > > > >  {
> > > > >     return static_branch_likely(&fast_misaligned_access_speed_key);
> > > > >  }
> > > > > +#else
> > > > > +static __always_inline bool has_fast_misaligned_accesses(void)
> > > > > +{
> > > > > +   return true;
> > > > > +}
> > > > > +#endif
> > > > >  #endif
> > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > > > index 89920f84d0a3..d787846c0b68 100644
> > > > > --- a/arch/riscv/kernel/cpufeature.c
> > > > > +++ b/arch/riscv/kernel/cpufeature.c
> > > > > @@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> > > > >  /* Per-cpu ISA extensions. */
> > > > >  struct riscv_isainfo hart_isa[NR_CPUS];
> > > > >
> > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > >  /* Performance information */
> > > > >  DEFINE_PER_CPU(long, misaligned_access_speed);
> > > > >
> > > > >  static cpumask_t fast_misaligned_access;
> > > > > +#endif
> > > > >
> > > > >  /**
> > > > >   * riscv_isa_extension_base() - Get base extension word
> > > > > @@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
> > > > >     return hwcap;
> > > > >  }
> > > > >
> > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > >  static int check_unaligned_access(void *param)
> > > > >  {
> > > > >     int cpu = smp_processor_id();
> > > > > @@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
> > > > >  }
> > > > >
> > > > >  arch_initcall(check_unaligned_access_all_cpus);
> > > > > +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
> > > > >
> > > > >  void riscv_user_isa_enable(void)
> > > > >  {
> > > >
> > > > Hi Charlie,
> > > >
> > > > Generally, having so much ifdef in various pieces of code is probably
> > > > not a good idea.
> > > >
> > > > AFAICT, if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, the whole
> > > > misaligned access speed checking could be opt-out. which means that
> > > > probably everything related to misaligned accesses should be moved in
> > > > it's own file build it only for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=n
> > > > only.
> > >
> > > I will look into doing something more clever here! I agree it is not
> > > very nice to have so many ifdefs scattered.
> > >
> > > >
> > > > > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> > > > > index a7c56b41efd2..3f1a6edfdb08 100644
> > > > > --- a/arch/riscv/kernel/sys_hwprobe.c
> > > > > +++ b/arch/riscv/kernel/sys_hwprobe.c
> > > > > @@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
> > > > >
> > > > >  static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > > > >  {
> > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > >     int cpu;
> > > > >     u64 perf = -1ULL;
> > > > >
> > > > > @@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > > > >             return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> > > > >
> > > > >     return perf;
> > > > > +#else
> > > > > +   return RISCV_HWPROBE_MISALIGNED_FAST;
> > > > > +#endif
> > > > >  }
> > > > >
> > > > >  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> > > > > diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned> index 8ded225e8c5b..c24f79d769f6 100644
> > > > > --- a/arch/riscv/kernel/traps_misaligned.c
> > > > > +++ b/arch/riscv/kernel/traps_misaligned.c
> > > > > @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> > > > >
> > > > >     perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
> > > > >
> > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > >     *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> > > > > +#endif
> > > >
> > > > I think that rather using ifdefery inside this file (traps_misaligned.c)
> > > >  it can be totally opt-out in case we have
> > > > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS since it implies that misaligned
> > > > accesses are not emulated (at least that is my understanding).
> > > >
> > >
> > > That's a great idea, I believe that is correct.
> > >
> > > - Charlie
> > >
> > > > Thanks,
> > > >
> > > > Clément
> > > >
> > > >
> > > > >
> > > > >     if (!unaligned_enabled)
> > > > >             return -1;
> > > > > @@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
> > > > >     return 0;
> > > > >  }
> > > > >
> > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > >  bool check_unaligned_access_emulated(int cpu)
> > > > >  {
> > > > >     long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
> > > > > @@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
> > > > >     }
> > > > >     unaligned_ctl = true;
> > > > >  }
> > > > > +#endif
> > > > >
> > > > >  bool unaligned_ctl_available(void)
> > > > >  {
> > > > >
> > > >
> > > >
> > > >
> > >
> > > _______________________________________________
> > > linux-riscv mailing list
> > > linux-riscv@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  2024-02-01 21:39           ` Charles Lohr
@ 2024-02-01 21:49             ` Charlie Jenkins
  0 siblings, 0 replies; 9+ messages in thread
From: Charlie Jenkins @ 2024-02-01 21:49 UTC (permalink / raw)
  To: Charles Lohr
  Cc: Clément Léger, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Jisheng Zhang, Evan Green, linux-riscv, linux-kernel

On Thu, Feb 01, 2024 at 01:39:53PM -0800, Charles Lohr wrote:
> I am very sorry for wasting your time - I did have it patched out in
> the build system here. I can't wait for this feature to land, so I can
> enjoy faster boot times without a patch.
> 
> Charles

No worries!

- Charlie

> 
> On Thu, Feb 1, 2024 at 12:47 PM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > On Thu, Feb 01, 2024 at 11:57:04AM -0800, Charles Lohr wrote:
> > > I am a little confused here - I was testing with 6.8-rc1 and it didn't
> > > seem to have the behavior of performing the probe (The probe kills
> > > boot performance in my application and I've had to patch out the probe
> > > in mid-6.x kernels).
> > >
> > > Did something get reverted to bring back the probe even when
> > > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=Y between rc1 and trunk?  Or am
> > > I misremembering/accidentally patched?
> >
> > After pulling a clean version of 6.8-rc1 and setting
> > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS I still see the probe happen.
> > Before sending this I looked for a patch that disabled the probe but was
> > unable to find one, if there exists a patch can you point me to it?
> >
> > - Charlie
> >
> > >
> > > On Thu, Feb 1, 2024 at 11:10 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> > > >
> > > > On Thu, Feb 01, 2024 at 02:43:43PM +0100, Clément Léger wrote:
> > > > >
> > > > >
> > > > > On 01/02/2024 07:40, Charlie Jenkins wrote:
> > > > > > When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> > > > > > set to have fast misaligned access without needing to probe.
> > > > > >
> > > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > > ---
> > > > > >  arch/riscv/include/asm/cpufeature.h  | 7 +++++++
> > > > > >  arch/riscv/kernel/cpufeature.c       | 4 ++++
> > > > > >  arch/riscv/kernel/sys_hwprobe.c      | 4 ++++
> > > > > >  arch/riscv/kernel/traps_misaligned.c | 4 ++++
> > > > > >  4 files changed, 19 insertions(+)
> > > > > >
> > > > > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > > > > > index dfdcca229174..7d8d64783e38 100644
> > > > > > --- a/arch/riscv/include/asm/cpufeature.h
> > > > > > +++ b/arch/riscv/include/asm/cpufeature.h
> > > > > > @@ -137,10 +137,17 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> > > > > >     return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > > > > >  }
> > > > > >
> > > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > > >  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> > > > > >
> > > > > >  static __always_inline bool has_fast_misaligned_accesses(void)
> > > > > >  {
> > > > > >     return static_branch_likely(&fast_misaligned_access_speed_key);
> > > > > >  }
> > > > > > +#else
> > > > > > +static __always_inline bool has_fast_misaligned_accesses(void)
> > > > > > +{
> > > > > > +   return true;
> > > > > > +}
> > > > > > +#endif
> > > > > >  #endif
> > > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > > > > index 89920f84d0a3..d787846c0b68 100644
> > > > > > --- a/arch/riscv/kernel/cpufeature.c
> > > > > > +++ b/arch/riscv/kernel/cpufeature.c
> > > > > > @@ -43,10 +43,12 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> > > > > >  /* Per-cpu ISA extensions. */
> > > > > >  struct riscv_isainfo hart_isa[NR_CPUS];
> > > > > >
> > > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > > >  /* Performance information */
> > > > > >  DEFINE_PER_CPU(long, misaligned_access_speed);
> > > > > >
> > > > > >  static cpumask_t fast_misaligned_access;
> > > > > > +#endif
> > > > > >
> > > > > >  /**
> > > > > >   * riscv_isa_extension_base() - Get base extension word
> > > > > > @@ -706,6 +708,7 @@ unsigned long riscv_get_elf_hwcap(void)
> > > > > >     return hwcap;
> > > > > >  }
> > > > > >
> > > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > > >  static int check_unaligned_access(void *param)
> > > > > >  {
> > > > > >     int cpu = smp_processor_id();
> > > > > > @@ -946,6 +949,7 @@ static int check_unaligned_access_all_cpus(void)
> > > > > >  }
> > > > > >
> > > > > >  arch_initcall(check_unaligned_access_all_cpus);
> > > > > > +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
> > > > > >
> > > > > >  void riscv_user_isa_enable(void)
> > > > > >  {
> > > > >
> > > > > Hi Charlie,
> > > > >
> > > > > Generally, having so much ifdef in various pieces of code is probably
> > > > > not a good idea.
> > > > >
> > > > > AFAICT, if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is enabled, the whole
> > > > > misaligned access speed checking could be opt-out. which means that
> > > > > probably everything related to misaligned accesses should be moved in
> > > > > it's own file build it only for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=n
> > > > > only.
> > > >
> > > > I will look into doing something more clever here! I agree it is not
> > > > very nice to have so many ifdefs scattered.
> > > >
> > > > >
> > > > > > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> > > > > > index a7c56b41efd2..3f1a6edfdb08 100644
> > > > > > --- a/arch/riscv/kernel/sys_hwprobe.c
> > > > > > +++ b/arch/riscv/kernel/sys_hwprobe.c
> > > > > > @@ -149,6 +149,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
> > > > > >
> > > > > >  static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > > > > >  {
> > > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > > >     int cpu;
> > > > > >     u64 perf = -1ULL;
> > > > > >
> > > > > > @@ -168,6 +169,9 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
> > > > > >             return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> > > > > >
> > > > > >     return perf;
> > > > > > +#else
> > > > > > +   return RISCV_HWPROBE_MISALIGNED_FAST;
> > > > > > +#endif
> > > > > >  }
> > > > > >
> > > > > >  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> > > > > > diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned> index 8ded225e8c5b..c24f79d769f6 100644
> > > > > > --- a/arch/riscv/kernel/traps_misaligned.c
> > > > > > +++ b/arch/riscv/kernel/traps_misaligned.c
> > > > > > @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> > > > > >
> > > > > >     perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
> > > > > >
> > > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > > >     *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> > > > > > +#endif
> > > > >
> > > > > I think that rather using ifdefery inside this file (traps_misaligned.c)
> > > > >  it can be totally opt-out in case we have
> > > > > CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS since it implies that misaligned
> > > > > accesses are not emulated (at least that is my understanding).
> > > > >
> > > >
> > > > That's a great idea, I believe that is correct.
> > > >
> > > > - Charlie
> > > >
> > > > > Thanks,
> > > > >
> > > > > Clément
> > > > >
> > > > >
> > > > > >
> > > > > >     if (!unaligned_enabled)
> > > > > >             return -1;
> > > > > > @@ -596,6 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
> > > > > >     return 0;
> > > > > >  }
> > > > > >
> > > > > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > > > > >  bool check_unaligned_access_emulated(int cpu)
> > > > > >  {
> > > > > >     long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
> > > > > > @@ -640,6 +643,7 @@ void unaligned_emulation_finish(void)
> > > > > >     }
> > > > > >     unaligned_ctl = true;
> > > > > >  }
> > > > > > +#endif
> > > > > >
> > > > > >  bool unaligned_ctl_available(void)
> > > > > >  {
> > > > > >
> > > > >
> > > > >
> > > > >
> > > >
> > > > _______________________________________________
> > > > linux-riscv mailing list
> > > > linux-riscv@lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-02-01 21:49 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-01  6:40 [PATCH 0/2] riscv: Use CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to set misaligned access speed Charlie Jenkins
2024-02-01  6:40 ` [PATCH 1/2] riscv: lib: Introduce has_fast_misaligned_access function Charlie Jenkins
2024-02-01  6:40 ` [PATCH 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS Charlie Jenkins
2024-02-01 13:43   ` Clément Léger
2024-02-01 19:10     ` Charlie Jenkins
2024-02-01 19:57       ` Charles Lohr
2024-02-01 20:47         ` Charlie Jenkins
2024-02-01 21:39           ` Charles Lohr
2024-02-01 21:49             ` Charlie Jenkins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).