All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-01 15:29 Ayyappa Ch
  2015-05-06 19:38   ` Anders Roxell
  2015-05-21 13:50   ` Anders Roxell
  0 siblings, 2 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-01 15:29 UTC (permalink / raw)
  To: linux-arm-kernel, Anders Roxell, Sebastian Andrzej Siewior,
	linux-rt-users
  Cc: kevin.hilman

Floating point operations in arm64 should not disable preempt .
Activating realtime features with below code.

>From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
From: Ayyappa Ch <ayyappa.chandolu@amd.com>
Date: Tue, 28 Apr 2015 11:53:00 +0530
Subject: [PATCH ] floating point realtime fix

---
 arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2438497..3dca156 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
  */
 void fpsimd_preserve_current_state(void)
 {
- preempt_disable();
+ migrate_disable();
  if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
  fpsimd_save_state(&current->thread.fpsimd_state);
- preempt_enable();
+ migrate_enable();
 }

 /*
@@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
  */
 void fpsimd_restore_current_state(void)
 {
- preempt_disable();
+ migrate_disable();
  if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
  struct fpsimd_state *st = &current->thread.fpsimd_state;

@@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
  this_cpu_write(fpsimd_last_state, st);
  st->cpu = smp_processor_id();
  }
- preempt_enable();
+ migrate_enable();
 }

 /*
@@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
  */
 void fpsimd_update_current_state(struct fpsimd_state *state)
 {
- preempt_disable();
+ migrate_disable();
  fpsimd_load_state(state);
  if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
  struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
  this_cpu_write(fpsimd_last_state, st);
  st->cpu = smp_processor_id();
  }
- preempt_enable();
+ migrate_enable();
 }

 /*
@@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
  * that there is no longer userland FPSIMD state in the
  * registers.
  */
- preempt_disable();
+ migrate_disable();
  if (current->mm &&
     !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
  fpsimd_save_state(&current->thread.fpsimd_state);
@@ -255,7 +255,7 @@ void kernel_neon_end(void)
  in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
  fpsimd_load_partial_state(s);
  } else {
- preempt_enable();
+ migrate_enable();
  }
 }
 EXPORT_SYMBOL(kernel_neon_end);

On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> one issue with Cyclic test while running floating point operations. So
> , modified below changes for that.
>
> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> Date: Tue, 28 Apr 2015 11:53:00 +0530
> Subject: [PATCH ] floating point realtime fix
>
> ---
>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 2438497..3dca156 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>   */
>  void fpsimd_preserve_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> - preempt_enable();
> + migrate_enable();
>  }
>
>  /*
> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>   */
>  void fpsimd_restore_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>
> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
>
>  /*
> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>   */
>  void fpsimd_update_current_state(struct fpsimd_state *state)
>  {
> - preempt_disable();
> + migrate_disable();
>   fpsimd_load_state(state);
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
>
>  /*
> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>   * that there is no longer userland FPSIMD state in the
>   * registers.
>   */
> - preempt_disable();
> + migrate_disable();
>   if (current->mm &&
>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>   fpsimd_load_partial_state(s);
>   } else {
> - preempt_enable();
> + migrate_enable();
>   }
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
> --
> 1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-01 15:29 [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd Ayyappa Ch
@ 2015-05-06 19:38   ` Anders Roxell
  2015-05-21 13:50   ` Anders Roxell
  1 sibling, 0 replies; 31+ messages in thread
From: Anders Roxell @ 2015-05-06 19:38 UTC (permalink / raw)
  To: Ayyappa Ch
  Cc: linux-arm-kernel, Sebastian Andrzej Siewior, linux-rt-users,
	kevin.hilman

On 2015-05-01 20:59, Ayyappa Ch wrote:
> Floating point operations in arm64 should not disable preempt .

I thought that I would see a problem if I ran a floating point
workload and cyclictest at the same time but I don't.

Can you please provide me some more details on how I can reproduce the
problem?

Cheers,
Anders

> Activating realtime features with below code.
> 
> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> Date: Tue, 28 Apr 2015 11:53:00 +0530
> Subject: [PATCH ] floating point realtime fix
> 
> ---
>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 2438497..3dca156 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>   */
>  void fpsimd_preserve_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>   */
>  void fpsimd_restore_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> 
> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>   */
>  void fpsimd_update_current_state(struct fpsimd_state *state)
>  {
> - preempt_disable();
> + migrate_disable();
>   fpsimd_load_state(state);
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>   * that there is no longer userland FPSIMD state in the
>   * registers.
>   */
> - preempt_disable();
> + migrate_disable();
>   if (current->mm &&
>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>   fpsimd_load_partial_state(s);
>   } else {
> - preempt_enable();
> + migrate_enable();
>   }
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
> 
> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> > one issue with Cyclic test while running floating point operations. So
> > , modified below changes for that.
> >
> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> > Date: Tue, 28 Apr 2015 11:53:00 +0530
> > Subject: [PATCH ] floating point realtime fix
> >
> > ---
> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index 2438497..3dca156 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >   */
> >  void fpsimd_preserve_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >   */
> >  void fpsimd_restore_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >
> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >   */
> >  void fpsimd_update_current_state(struct fpsimd_state *state)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   fpsimd_load_state(state);
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >   * that there is no longer userland FPSIMD state in the
> >   * registers.
> >   */
> > - preempt_disable();
> > + migrate_disable();
> >   if (current->mm &&
> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >   fpsimd_load_partial_state(s);
> >   } else {
> > - preempt_enable();
> > + migrate_enable();
> >   }
> >  }
> >  EXPORT_SYMBOL(kernel_neon_end);
> > --
> > 1.9.1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-06 19:38   ` Anders Roxell
  0 siblings, 0 replies; 31+ messages in thread
From: Anders Roxell @ 2015-05-06 19:38 UTC (permalink / raw)
  To: linux-arm-kernel

On 2015-05-01 20:59, Ayyappa Ch wrote:
> Floating point operations in arm64 should not disable preempt .

I thought that I would see a problem if I ran a floating point
workload and cyclictest at the same time but I don't.

Can you please provide me some more details on how I can reproduce the
problem?

Cheers,
Anders

> Activating realtime features with below code.
> 
> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> Date: Tue, 28 Apr 2015 11:53:00 +0530
> Subject: [PATCH ] floating point realtime fix
> 
> ---
>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 2438497..3dca156 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>   */
>  void fpsimd_preserve_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>   */
>  void fpsimd_restore_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> 
> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>   */
>  void fpsimd_update_current_state(struct fpsimd_state *state)
>  {
> - preempt_disable();
> + migrate_disable();
>   fpsimd_load_state(state);
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>   * that there is no longer userland FPSIMD state in the
>   * registers.
>   */
> - preempt_disable();
> + migrate_disable();
>   if (current->mm &&
>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>   fpsimd_load_partial_state(s);
>   } else {
> - preempt_enable();
> + migrate_enable();
>   }
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
> 
> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> > one issue with Cyclic test while running floating point operations. So
> > , modified below changes for that.
> >
> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> > Date: Tue, 28 Apr 2015 11:53:00 +0530
> > Subject: [PATCH ] floating point realtime fix
> >
> > ---
> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index 2438497..3dca156 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >   */
> >  void fpsimd_preserve_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >   */
> >  void fpsimd_restore_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >
> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >   */
> >  void fpsimd_update_current_state(struct fpsimd_state *state)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   fpsimd_load_state(state);
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >   * that there is no longer userland FPSIMD state in the
> >   * registers.
> >   */
> > - preempt_disable();
> > + migrate_disable();
> >   if (current->mm &&
> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >   fpsimd_load_partial_state(s);
> >   } else {
> > - preempt_enable();
> > + migrate_enable();
> >   }
> >  }
> >  EXPORT_SYMBOL(kernel_neon_end);
> > --
> > 1.9.1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-06 19:38   ` Anders Roxell
@ 2015-05-07 11:09     ` Ayyappa Ch
  -1 siblings, 0 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-07 11:09 UTC (permalink / raw)
  To: Anders Roxell
  Cc: linux-arm-kernel, Sebastian Andrzej Siewior, linux-rt-users,
	Kevin Hilman

./cyclictest -p 80 -t5 -n  => Cyclic test is failing after 6Lack iterations
T: 0 ( 1364) P:99 I:1000 C: 668664 Min:      3 Act:    5 Avg:   11 Max:  100116

After analyzing the trace log , we observed the sudden change in
latency due to calling of fpsimd_preserve_current_state function.

Line 149766:     ntpd-925     1....1.. 512046976us :
fpsimd_preserve_current_state <-do_signal
Line 149768:     ntpd-925     1....1.. 512046977us : preempt_count_add
<-fpsimd_preserve_current_state
Line 149770:     ntpd-925     1....2.. 512046978us : preempt_count_sub
<-fpsimd_preserve_current_state

.......
Line 163170: cyclicte-964     0.....11 512065181us :
tracing_mark_write: hit latency threshold (98383 > 500)


Thanks and regards,
Ayyappa.Ch

...

On Thu, May 7, 2015 at 1:08 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
> On 2015-05-01 20:59, Ayyappa Ch wrote:
>> Floating point operations in arm64 should not disable preempt .
>
> I thought that I would see a problem if I ran a floating point
> workload and cyclictest at the same time but I don't.
>
> Can you please provide me some more details on how I can reproduce the
> problem?
>
> Cheers,
> Anders
>
>> Activating realtime features with below code.
>>
>> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> Date: Tue, 28 Apr 2015 11:53:00 +0530
>> Subject: [PATCH ] floating point realtime fix
>>
>> ---
>>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>>  1 file changed, 8 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 2438497..3dca156 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>>   */
>>  void fpsimd_preserve_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>>   */
>>  void fpsimd_restore_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>>
>> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>>   */
>>  void fpsimd_update_current_state(struct fpsimd_state *state)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   fpsimd_load_state(state);
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>>   * that there is no longer userland FPSIMD state in the
>>   * registers.
>>   */
>> - preempt_disable();
>> + migrate_disable();
>>   if (current->mm &&
>>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>>   fpsimd_load_partial_state(s);
>>   } else {
>> - preempt_enable();
>> + migrate_enable();
>>   }
>>  }
>>  EXPORT_SYMBOL(kernel_neon_end);
>>
>> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
>> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
>> > one issue with Cyclic test while running floating point operations. So
>> > , modified below changes for that.
>> >
>> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> > Date: Tue, 28 Apr 2015 11:53:00 +0530
>> > Subject: [PATCH ] floating point realtime fix
>> >
>> > ---
>> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >  1 file changed, 8 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> > index 2438497..3dca156 100644
>> > --- a/arch/arm64/kernel/fpsimd.c
>> > +++ b/arch/arm64/kernel/fpsimd.c
>> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >   */
>> >  void fpsimd_preserve_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >   */
>> >  void fpsimd_restore_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >
>> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >   */
>> >  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   fpsimd_load_state(state);
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >   * that there is no longer userland FPSIMD state in the
>> >   * registers.
>> >   */
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (current->mm &&
>> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >   fpsimd_load_partial_state(s);
>> >   } else {
>> > - preempt_enable();
>> > + migrate_enable();
>> >   }
>> >  }
>> >  EXPORT_SYMBOL(kernel_neon_end);
>> > --
>> > 1.9.1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-07 11:09     ` Ayyappa Ch
  0 siblings, 0 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-07 11:09 UTC (permalink / raw)
  To: linux-arm-kernel

./cyclictest -p 80 -t5 -n  => Cyclic test is failing after 6Lack iterations
T: 0 ( 1364) P:99 I:1000 C: 668664 Min:      3 Act:    5 Avg:   11 Max:  100116

After analyzing the trace log , we observed the sudden change in
latency due to calling of fpsimd_preserve_current_state function.

Line 149766:     ntpd-925     1....1.. 512046976us :
fpsimd_preserve_current_state <-do_signal
Line 149768:     ntpd-925     1....1.. 512046977us : preempt_count_add
<-fpsimd_preserve_current_state
Line 149770:     ntpd-925     1....2.. 512046978us : preempt_count_sub
<-fpsimd_preserve_current_state

.......
Line 163170: cyclicte-964     0.....11 512065181us :
tracing_mark_write: hit latency threshold (98383 > 500)


Thanks and regards,
Ayyappa.Ch

...

On Thu, May 7, 2015 at 1:08 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
> On 2015-05-01 20:59, Ayyappa Ch wrote:
>> Floating point operations in arm64 should not disable preempt .
>
> I thought that I would see a problem if I ran a floating point
> workload and cyclictest at the same time but I don't.
>
> Can you please provide me some more details on how I can reproduce the
> problem?
>
> Cheers,
> Anders
>
>> Activating realtime features with below code.
>>
>> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> Date: Tue, 28 Apr 2015 11:53:00 +0530
>> Subject: [PATCH ] floating point realtime fix
>>
>> ---
>>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>>  1 file changed, 8 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 2438497..3dca156 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>>   */
>>  void fpsimd_preserve_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>>   */
>>  void fpsimd_restore_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>>
>> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>>   */
>>  void fpsimd_update_current_state(struct fpsimd_state *state)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   fpsimd_load_state(state);
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>>   * that there is no longer userland FPSIMD state in the
>>   * registers.
>>   */
>> - preempt_disable();
>> + migrate_disable();
>>   if (current->mm &&
>>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>>   fpsimd_load_partial_state(s);
>>   } else {
>> - preempt_enable();
>> + migrate_enable();
>>   }
>>  }
>>  EXPORT_SYMBOL(kernel_neon_end);
>>
>> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
>> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
>> > one issue with Cyclic test while running floating point operations. So
>> > , modified below changes for that.
>> >
>> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> > Date: Tue, 28 Apr 2015 11:53:00 +0530
>> > Subject: [PATCH ] floating point realtime fix
>> >
>> > ---
>> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >  1 file changed, 8 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> > index 2438497..3dca156 100644
>> > --- a/arch/arm64/kernel/fpsimd.c
>> > +++ b/arch/arm64/kernel/fpsimd.c
>> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >   */
>> >  void fpsimd_preserve_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >   */
>> >  void fpsimd_restore_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >
>> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >   */
>> >  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   fpsimd_load_state(state);
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >   * that there is no longer userland FPSIMD state in the
>> >   * registers.
>> >   */
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (current->mm &&
>> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >   fpsimd_load_partial_state(s);
>> >   } else {
>> > - preempt_enable();
>> > + migrate_enable();
>> >   }
>> >  }
>> >  EXPORT_SYMBOL(kernel_neon_end);
>> > --
>> > 1.9.1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-07 11:09     ` Ayyappa Ch
@ 2015-05-08  0:09       ` Anders Roxell
  -1 siblings, 0 replies; 31+ messages in thread
From: Anders Roxell @ 2015-05-08  0:09 UTC (permalink / raw)
  To: Ayyappa Ch
  Cc: linux-arm-kernel, Sebastian Andrzej Siewior, linux-rt-users,
	Kevin Hilman

On 2015-05-07 16:39, Ayyappa Ch wrote:
> ./cyclictest -p 80 -t5 -n  => Cyclic test is failing after 6Lack iterations
> T: 0 ( 1364) P:99 I:1000 C: 668664 Min:      3 Act:    5 Avg:   11 Max:  100116
> 
> After analyzing the trace log , we observed the sudden change in
> latency due to calling of fpsimd_preserve_current_state function.

I'm still unable to reproduce your problem scenario and from the log
snippet below I'm not convinced that fpsimd_preserve_current_state is
the bad guy.

> 
> Line 149766:     ntpd-925     1....1.. 512046976us :
> fpsimd_preserve_current_state <-do_signal
> Line 149768:     ntpd-925     1....1.. 512046977us : preempt_count_add
> <-fpsimd_preserve_current_state
> Line 149770:     ntpd-925     1....2.. 512046978us : preempt_count_sub
> <-fpsimd_preserve_current_state

fpsimd_preserve_current_state seams to return within 2us

> 
> .......
> Line 163170: cyclicte-964     0.....11 512065181us :

The latency appears to be happening somewhere between the
fpsimd_preserve_current_state return and the resumption of the cyclictest.
Whatever cased the latency may have happened in the omitted portion of
the trace log?
It may not even have been traceable thread of execution, depending on
the trace tool that have been used.
If the part of the trace log that you didn't show doesn't seam to have
any interesting events then maybe the offending code runs outside of
scheduler control or never touches any defined trace points.

Cheers,
Anders

> tracing_mark_write: hit latency threshold (98383 > 500)
> 
> 
> Thanks and regards,
> Ayyappa.Ch
> 
> ...
> 
> On Thu, May 7, 2015 at 1:08 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
> > On 2015-05-01 20:59, Ayyappa Ch wrote:
> >> Floating point operations in arm64 should not disable preempt .
> >
> > I thought that I would see a problem if I ran a floating point
> > workload and cyclictest at the same time but I don't.
> >
> > Can you please provide me some more details on how I can reproduce the
> > problem?
> >
> > Cheers,
> > Anders
> >
> >> Activating realtime features with below code.
> >>
> >> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> >> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> >> Date: Tue, 28 Apr 2015 11:53:00 +0530
> >> Subject: [PATCH ] floating point realtime fix
> >>
> >> ---
> >>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >>  1 file changed, 8 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> >> index 2438497..3dca156 100644
> >> --- a/arch/arm64/kernel/fpsimd.c
> >> +++ b/arch/arm64/kernel/fpsimd.c
> >> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >>   */
> >>  void fpsimd_preserve_current_state(void)
> >>  {
> >> - preempt_disable();
> >> + migrate_disable();
> >>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >>   fpsimd_save_state(&current->thread.fpsimd_state);
> >> - preempt_enable();
> >> + migrate_enable();
> >>  }
> >>
> >>  /*
> >> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >>   */
> >>  void fpsimd_restore_current_state(void)
> >>  {
> >> - preempt_disable();
> >> + migrate_disable();
> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >>
> >> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >>   this_cpu_write(fpsimd_last_state, st);
> >>   st->cpu = smp_processor_id();
> >>   }
> >> - preempt_enable();
> >> + migrate_enable();
> >>  }
> >>
> >>  /*
> >> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >>   */
> >>  void fpsimd_update_current_state(struct fpsimd_state *state)
> >>  {
> >> - preempt_disable();
> >> + migrate_disable();
> >>   fpsimd_load_state(state);
> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >>   this_cpu_write(fpsimd_last_state, st);
> >>   st->cpu = smp_processor_id();
> >>   }
> >> - preempt_enable();
> >> + migrate_enable();
> >>  }
> >>
> >>  /*
> >> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >>   * that there is no longer userland FPSIMD state in the
> >>   * registers.
> >>   */
> >> - preempt_disable();
> >> + migrate_disable();
> >>   if (current->mm &&
> >>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >>   fpsimd_save_state(&current->thread.fpsimd_state);
> >> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >>   fpsimd_load_partial_state(s);
> >>   } else {
> >> - preempt_enable();
> >> + migrate_enable();
> >>   }
> >>  }
> >>  EXPORT_SYMBOL(kernel_neon_end);
> >>
> >> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> >> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> >> > one issue with Cyclic test while running floating point operations. So
> >> > , modified below changes for that.
> >> >
> >> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> >> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> >> > Date: Tue, 28 Apr 2015 11:53:00 +0530
> >> > Subject: [PATCH ] floating point realtime fix
> >> >
> >> > ---
> >> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >> >
> >> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> >> > index 2438497..3dca156 100644
> >> > --- a/arch/arm64/kernel/fpsimd.c
> >> > +++ b/arch/arm64/kernel/fpsimd.c
> >> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >> >   */
> >> >  void fpsimd_preserve_current_state(void)
> >> >  {
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >  }
> >> >
> >> >  /*
> >> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >> >   */
> >> >  void fpsimd_restore_current_state(void)
> >> >  {
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >> >
> >> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >> >   this_cpu_write(fpsimd_last_state, st);
> >> >   st->cpu = smp_processor_id();
> >> >   }
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >  }
> >> >
> >> >  /*
> >> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >> >   */
> >> >  void fpsimd_update_current_state(struct fpsimd_state *state)
> >> >  {
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   fpsimd_load_state(state);
> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >> >   this_cpu_write(fpsimd_last_state, st);
> >> >   st->cpu = smp_processor_id();
> >> >   }
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >  }
> >> >
> >> >  /*
> >> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >> >   * that there is no longer userland FPSIMD state in the
> >> >   * registers.
> >> >   */
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   if (current->mm &&
> >> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
> >> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >> >   fpsimd_load_partial_state(s);
> >> >   } else {
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >   }
> >> >  }
> >> >  EXPORT_SYMBOL(kernel_neon_end);
> >> > --
> >> > 1.9.1

-- 
Anders Roxell
anders.roxell@linaro.org
M: +46 709 71 42 85 | IRC: roxell

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-08  0:09       ` Anders Roxell
  0 siblings, 0 replies; 31+ messages in thread
From: Anders Roxell @ 2015-05-08  0:09 UTC (permalink / raw)
  To: linux-arm-kernel

On 2015-05-07 16:39, Ayyappa Ch wrote:
> ./cyclictest -p 80 -t5 -n  => Cyclic test is failing after 6Lack iterations
> T: 0 ( 1364) P:99 I:1000 C: 668664 Min:      3 Act:    5 Avg:   11 Max:  100116
> 
> After analyzing the trace log , we observed the sudden change in
> latency due to calling of fpsimd_preserve_current_state function.

I'm still unable to reproduce your problem scenario and from the log
snippet below I'm not convinced that fpsimd_preserve_current_state is
the bad guy.

> 
> Line 149766:     ntpd-925     1....1.. 512046976us :
> fpsimd_preserve_current_state <-do_signal
> Line 149768:     ntpd-925     1....1.. 512046977us : preempt_count_add
> <-fpsimd_preserve_current_state
> Line 149770:     ntpd-925     1....2.. 512046978us : preempt_count_sub
> <-fpsimd_preserve_current_state

fpsimd_preserve_current_state seams to return within 2us

> 
> .......
> Line 163170: cyclicte-964     0.....11 512065181us :

The latency appears to be happening somewhere between the
fpsimd_preserve_current_state return and the resumption of the cyclictest.
Whatever cased the latency may have happened in the omitted portion of
the trace log?
It may not even have been traceable thread of execution, depending on
the trace tool that have been used.
If the part of the trace log that you didn't show doesn't seam to have
any interesting events then maybe the offending code runs outside of
scheduler control or never touches any defined trace points.

Cheers,
Anders

> tracing_mark_write: hit latency threshold (98383 > 500)
> 
> 
> Thanks and regards,
> Ayyappa.Ch
> 
> ...
> 
> On Thu, May 7, 2015 at 1:08 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
> > On 2015-05-01 20:59, Ayyappa Ch wrote:
> >> Floating point operations in arm64 should not disable preempt .
> >
> > I thought that I would see a problem if I ran a floating point
> > workload and cyclictest at the same time but I don't.
> >
> > Can you please provide me some more details on how I can reproduce the
> > problem?
> >
> > Cheers,
> > Anders
> >
> >> Activating realtime features with below code.
> >>
> >> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> >> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> >> Date: Tue, 28 Apr 2015 11:53:00 +0530
> >> Subject: [PATCH ] floating point realtime fix
> >>
> >> ---
> >>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >>  1 file changed, 8 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> >> index 2438497..3dca156 100644
> >> --- a/arch/arm64/kernel/fpsimd.c
> >> +++ b/arch/arm64/kernel/fpsimd.c
> >> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >>   */
> >>  void fpsimd_preserve_current_state(void)
> >>  {
> >> - preempt_disable();
> >> + migrate_disable();
> >>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >>   fpsimd_save_state(&current->thread.fpsimd_state);
> >> - preempt_enable();
> >> + migrate_enable();
> >>  }
> >>
> >>  /*
> >> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >>   */
> >>  void fpsimd_restore_current_state(void)
> >>  {
> >> - preempt_disable();
> >> + migrate_disable();
> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >>
> >> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >>   this_cpu_write(fpsimd_last_state, st);
> >>   st->cpu = smp_processor_id();
> >>   }
> >> - preempt_enable();
> >> + migrate_enable();
> >>  }
> >>
> >>  /*
> >> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >>   */
> >>  void fpsimd_update_current_state(struct fpsimd_state *state)
> >>  {
> >> - preempt_disable();
> >> + migrate_disable();
> >>   fpsimd_load_state(state);
> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >>   this_cpu_write(fpsimd_last_state, st);
> >>   st->cpu = smp_processor_id();
> >>   }
> >> - preempt_enable();
> >> + migrate_enable();
> >>  }
> >>
> >>  /*
> >> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >>   * that there is no longer userland FPSIMD state in the
> >>   * registers.
> >>   */
> >> - preempt_disable();
> >> + migrate_disable();
> >>   if (current->mm &&
> >>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >>   fpsimd_save_state(&current->thread.fpsimd_state);
> >> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >>   fpsimd_load_partial_state(s);
> >>   } else {
> >> - preempt_enable();
> >> + migrate_enable();
> >>   }
> >>  }
> >>  EXPORT_SYMBOL(kernel_neon_end);
> >>
> >> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> >> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> >> > one issue with Cyclic test while running floating point operations. So
> >> > , modified below changes for that.
> >> >
> >> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> >> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> >> > Date: Tue, 28 Apr 2015 11:53:00 +0530
> >> > Subject: [PATCH ] floating point realtime fix
> >> >
> >> > ---
> >> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >> >
> >> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> >> > index 2438497..3dca156 100644
> >> > --- a/arch/arm64/kernel/fpsimd.c
> >> > +++ b/arch/arm64/kernel/fpsimd.c
> >> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >> >   */
> >> >  void fpsimd_preserve_current_state(void)
> >> >  {
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >  }
> >> >
> >> >  /*
> >> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >> >   */
> >> >  void fpsimd_restore_current_state(void)
> >> >  {
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >> >
> >> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >> >   this_cpu_write(fpsimd_last_state, st);
> >> >   st->cpu = smp_processor_id();
> >> >   }
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >  }
> >> >
> >> >  /*
> >> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >> >   */
> >> >  void fpsimd_update_current_state(struct fpsimd_state *state)
> >> >  {
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   fpsimd_load_state(state);
> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >> >   this_cpu_write(fpsimd_last_state, st);
> >> >   st->cpu = smp_processor_id();
> >> >   }
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >  }
> >> >
> >> >  /*
> >> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >> >   * that there is no longer userland FPSIMD state in the
> >> >   * registers.
> >> >   */
> >> > - preempt_disable();
> >> > + migrate_disable();
> >> >   if (current->mm &&
> >> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
> >> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >> >   fpsimd_load_partial_state(s);
> >> >   } else {
> >> > - preempt_enable();
> >> > + migrate_enable();
> >> >   }
> >> >  }
> >> >  EXPORT_SYMBOL(kernel_neon_end);
> >> > --
> >> > 1.9.1

-- 
Anders Roxell
anders.roxell at linaro.org
M: +46 709 71 42 85 | IRC: roxell

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-08  0:09       ` Anders Roxell
@ 2015-05-11  5:32         ` Ayyappa Ch
  -1 siblings, 0 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-11  5:32 UTC (permalink / raw)
  To: Anders Roxell
  Cc: linux-arm-kernel, Sebastian Andrzej Siewior, linux-rt-users,
	Kevin Hilman

Hello Anders Roxell,

Many thanks for your inputs.

But as per my observation after this change , Cyclic test did not failed.

As logs are huge size , sent only to you.

Can you please check the logs and let us know your view ?

Thanks and regards,
Ayyappa.Ch

On Fri, May 8, 2015 at 5:39 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
> On 2015-05-07 16:39, Ayyappa Ch wrote:
>> ./cyclictest -p 80 -t5 -n  => Cyclic test is failing after 6Lack iterations
>> T: 0 ( 1364) P:99 I:1000 C: 668664 Min:      3 Act:    5 Avg:   11 Max:  100116
>>
>> After analyzing the trace log , we observed the sudden change in
>> latency due to calling of fpsimd_preserve_current_state function.
>
> I'm still unable to reproduce your problem scenario and from the log
> snippet below I'm not convinced that fpsimd_preserve_current_state is
> the bad guy.
>
>>
>> Line 149766:     ntpd-925     1....1.. 512046976us :
>> fpsimd_preserve_current_state <-do_signal
>> Line 149768:     ntpd-925     1....1.. 512046977us : preempt_count_add
>> <-fpsimd_preserve_current_state
>> Line 149770:     ntpd-925     1....2.. 512046978us : preempt_count_sub
>> <-fpsimd_preserve_current_state
>
> fpsimd_preserve_current_state seams to return within 2us
>
>>
>> .......
>> Line 163170: cyclicte-964     0.....11 512065181us :
>
> The latency appears to be happening somewhere between the
> fpsimd_preserve_current_state return and the resumption of the cyclictest.
> Whatever cased the latency may have happened in the omitted portion of
> the trace log?
> It may not even have been traceable thread of execution, depending on
> the trace tool that have been used.
> If the part of the trace log that you didn't show doesn't seam to have
> any interesting events then maybe the offending code runs outside of
> scheduler control or never touches any defined trace points.
>
> Cheers,
> Anders
>
>> tracing_mark_write: hit latency threshold (98383 > 500)
>>
>>
>> Thanks and regards,
>> Ayyappa.Ch
>>
>> ...
>>
>> On Thu, May 7, 2015 at 1:08 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
>> > On 2015-05-01 20:59, Ayyappa Ch wrote:
>> >> Floating point operations in arm64 should not disable preempt .
>> >
>> > I thought that I would see a problem if I ran a floating point
>> > workload and cyclictest at the same time but I don't.
>> >
>> > Can you please provide me some more details on how I can reproduce the
>> > problem?
>> >
>> > Cheers,
>> > Anders
>> >
>> >> Activating realtime features with below code.
>> >>
>> >> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> >> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> >> Date: Tue, 28 Apr 2015 11:53:00 +0530
>> >> Subject: [PATCH ] floating point realtime fix
>> >>
>> >> ---
>> >>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >>  1 file changed, 8 insertions(+), 8 deletions(-)
>> >>
>> >> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> >> index 2438497..3dca156 100644
>> >> --- a/arch/arm64/kernel/fpsimd.c
>> >> +++ b/arch/arm64/kernel/fpsimd.c
>> >> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >>   */
>> >>  void fpsimd_preserve_current_state(void)
>> >>  {
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >>   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>  }
>> >>
>> >>  /*
>> >> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >>   */
>> >>  void fpsimd_restore_current_state(void)
>> >>  {
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >>
>> >> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >>   this_cpu_write(fpsimd_last_state, st);
>> >>   st->cpu = smp_processor_id();
>> >>   }
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>  }
>> >>
>> >>  /*
>> >> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >>   */
>> >>  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >>  {
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   fpsimd_load_state(state);
>> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >>   this_cpu_write(fpsimd_last_state, st);
>> >>   st->cpu = smp_processor_id();
>> >>   }
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>  }
>> >>
>> >>  /*
>> >> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >>   * that there is no longer userland FPSIMD state in the
>> >>   * registers.
>> >>   */
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   if (current->mm &&
>> >>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >>   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >>   fpsimd_load_partial_state(s);
>> >>   } else {
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>   }
>> >>  }
>> >>  EXPORT_SYMBOL(kernel_neon_end);
>> >>
>> >> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
>> >> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
>> >> > one issue with Cyclic test while running floating point operations. So
>> >> > , modified below changes for that.
>> >> >
>> >> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> >> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> >> > Date: Tue, 28 Apr 2015 11:53:00 +0530
>> >> > Subject: [PATCH ] floating point realtime fix
>> >> >
>> >> > ---
>> >> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >> >  1 file changed, 8 insertions(+), 8 deletions(-)
>> >> >
>> >> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> >> > index 2438497..3dca156 100644
>> >> > --- a/arch/arm64/kernel/fpsimd.c
>> >> > +++ b/arch/arm64/kernel/fpsimd.c
>> >> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >> >   */
>> >> >  void fpsimd_preserve_current_state(void)
>> >> >  {
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >  }
>> >> >
>> >> >  /*
>> >> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >> >   */
>> >> >  void fpsimd_restore_current_state(void)
>> >> >  {
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >> >
>> >> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >> >   this_cpu_write(fpsimd_last_state, st);
>> >> >   st->cpu = smp_processor_id();
>> >> >   }
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >  }
>> >> >
>> >> >  /*
>> >> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >> >   */
>> >> >  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >> >  {
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   fpsimd_load_state(state);
>> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >> >   this_cpu_write(fpsimd_last_state, st);
>> >> >   st->cpu = smp_processor_id();
>> >> >   }
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >  }
>> >> >
>> >> >  /*
>> >> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >> >   * that there is no longer userland FPSIMD state in the
>> >> >   * registers.
>> >> >   */
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   if (current->mm &&
>> >> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >> >   fpsimd_load_partial_state(s);
>> >> >   } else {
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >   }
>> >> >  }
>> >> >  EXPORT_SYMBOL(kernel_neon_end);
>> >> > --
>> >> > 1.9.1
>
> --
> Anders Roxell
> anders.roxell@linaro.org
> M: +46 709 71 42 85 | IRC: roxell

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-11  5:32         ` Ayyappa Ch
  0 siblings, 0 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-11  5:32 UTC (permalink / raw)
  To: linux-arm-kernel

Hello Anders Roxell,

Many thanks for your inputs.

But as per my observation after this change , Cyclic test did not failed.

As logs are huge size , sent only to you.

Can you please check the logs and let us know your view ?

Thanks and regards,
Ayyappa.Ch

On Fri, May 8, 2015 at 5:39 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
> On 2015-05-07 16:39, Ayyappa Ch wrote:
>> ./cyclictest -p 80 -t5 -n  => Cyclic test is failing after 6Lack iterations
>> T: 0 ( 1364) P:99 I:1000 C: 668664 Min:      3 Act:    5 Avg:   11 Max:  100116
>>
>> After analyzing the trace log , we observed the sudden change in
>> latency due to calling of fpsimd_preserve_current_state function.
>
> I'm still unable to reproduce your problem scenario and from the log
> snippet below I'm not convinced that fpsimd_preserve_current_state is
> the bad guy.
>
>>
>> Line 149766:     ntpd-925     1....1.. 512046976us :
>> fpsimd_preserve_current_state <-do_signal
>> Line 149768:     ntpd-925     1....1.. 512046977us : preempt_count_add
>> <-fpsimd_preserve_current_state
>> Line 149770:     ntpd-925     1....2.. 512046978us : preempt_count_sub
>> <-fpsimd_preserve_current_state
>
> fpsimd_preserve_current_state seams to return within 2us
>
>>
>> .......
>> Line 163170: cyclicte-964     0.....11 512065181us :
>
> The latency appears to be happening somewhere between the
> fpsimd_preserve_current_state return and the resumption of the cyclictest.
> Whatever cased the latency may have happened in the omitted portion of
> the trace log?
> It may not even have been traceable thread of execution, depending on
> the trace tool that have been used.
> If the part of the trace log that you didn't show doesn't seam to have
> any interesting events then maybe the offending code runs outside of
> scheduler control or never touches any defined trace points.
>
> Cheers,
> Anders
>
>> tracing_mark_write: hit latency threshold (98383 > 500)
>>
>>
>> Thanks and regards,
>> Ayyappa.Ch
>>
>> ...
>>
>> On Thu, May 7, 2015 at 1:08 AM, Anders Roxell <anders.roxell@linaro.org> wrote:
>> > On 2015-05-01 20:59, Ayyappa Ch wrote:
>> >> Floating point operations in arm64 should not disable preempt .
>> >
>> > I thought that I would see a problem if I ran a floating point
>> > workload and cyclictest at the same time but I don't.
>> >
>> > Can you please provide me some more details on how I can reproduce the
>> > problem?
>> >
>> > Cheers,
>> > Anders
>> >
>> >> Activating realtime features with below code.
>> >>
>> >> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> >> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> >> Date: Tue, 28 Apr 2015 11:53:00 +0530
>> >> Subject: [PATCH ] floating point realtime fix
>> >>
>> >> ---
>> >>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >>  1 file changed, 8 insertions(+), 8 deletions(-)
>> >>
>> >> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> >> index 2438497..3dca156 100644
>> >> --- a/arch/arm64/kernel/fpsimd.c
>> >> +++ b/arch/arm64/kernel/fpsimd.c
>> >> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >>   */
>> >>  void fpsimd_preserve_current_state(void)
>> >>  {
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >>   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>  }
>> >>
>> >>  /*
>> >> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >>   */
>> >>  void fpsimd_restore_current_state(void)
>> >>  {
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >>
>> >> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >>   this_cpu_write(fpsimd_last_state, st);
>> >>   st->cpu = smp_processor_id();
>> >>   }
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>  }
>> >>
>> >>  /*
>> >> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >>   */
>> >>  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >>  {
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   fpsimd_load_state(state);
>> >>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >>   this_cpu_write(fpsimd_last_state, st);
>> >>   st->cpu = smp_processor_id();
>> >>   }
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>  }
>> >>
>> >>  /*
>> >> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >>   * that there is no longer userland FPSIMD state in the
>> >>   * registers.
>> >>   */
>> >> - preempt_disable();
>> >> + migrate_disable();
>> >>   if (current->mm &&
>> >>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >>   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >>   fpsimd_load_partial_state(s);
>> >>   } else {
>> >> - preempt_enable();
>> >> + migrate_enable();
>> >>   }
>> >>  }
>> >>  EXPORT_SYMBOL(kernel_neon_end);
>> >>
>> >> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
>> >> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
>> >> > one issue with Cyclic test while running floating point operations. So
>> >> > , modified below changes for that.
>> >> >
>> >> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> >> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> >> > Date: Tue, 28 Apr 2015 11:53:00 +0530
>> >> > Subject: [PATCH ] floating point realtime fix
>> >> >
>> >> > ---
>> >> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >> >  1 file changed, 8 insertions(+), 8 deletions(-)
>> >> >
>> >> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> >> > index 2438497..3dca156 100644
>> >> > --- a/arch/arm64/kernel/fpsimd.c
>> >> > +++ b/arch/arm64/kernel/fpsimd.c
>> >> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >> >   */
>> >> >  void fpsimd_preserve_current_state(void)
>> >> >  {
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >  }
>> >> >
>> >> >  /*
>> >> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >> >   */
>> >> >  void fpsimd_restore_current_state(void)
>> >> >  {
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >> >
>> >> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >> >   this_cpu_write(fpsimd_last_state, st);
>> >> >   st->cpu = smp_processor_id();
>> >> >   }
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >  }
>> >> >
>> >> >  /*
>> >> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >> >   */
>> >> >  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >> >  {
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   fpsimd_load_state(state);
>> >> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >> >   this_cpu_write(fpsimd_last_state, st);
>> >> >   st->cpu = smp_processor_id();
>> >> >   }
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >  }
>> >> >
>> >> >  /*
>> >> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >> >   * that there is no longer userland FPSIMD state in the
>> >> >   * registers.
>> >> >   */
>> >> > - preempt_disable();
>> >> > + migrate_disable();
>> >> >   if (current->mm &&
>> >> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> >> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >> >   fpsimd_load_partial_state(s);
>> >> >   } else {
>> >> > - preempt_enable();
>> >> > + migrate_enable();
>> >> >   }
>> >> >  }
>> >> >  EXPORT_SYMBOL(kernel_neon_end);
>> >> > --
>> >> > 1.9.1
>
> --
> Anders Roxell
> anders.roxell at linaro.org
> M: +46 709 71 42 85 | IRC: roxell

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-11  5:32         ` Ayyappa Ch
@ 2015-05-14 16:07           ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 31+ messages in thread
From: Sebastian Andrzej Siewior @ 2015-05-14 16:07 UTC (permalink / raw)
  To: Ayyappa Ch; +Cc: Anders Roxell, linux-arm-kernel, linux-rt-users, Kevin Hilman

* Ayyappa Ch | 2015-05-11 11:02:56 [+0530]:

>Hello Anders Roxell,
>
>Many thanks for your inputs.
>
>But as per my observation after this change , Cyclic test did not failed.
>
>As logs are huge size , sent only to you.
>
>Can you please check the logs and let us know your view ?

fpsimd_save_state() is almost the fpsimd_save macro which saves 32
16byte registers. And *this* takes really that long? You max says 100116
which means 100ms.

>Thanks and regards,
>Ayyappa.Ch

Sebastian

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-14 16:07           ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 31+ messages in thread
From: Sebastian Andrzej Siewior @ 2015-05-14 16:07 UTC (permalink / raw)
  To: linux-arm-kernel

* Ayyappa Ch | 2015-05-11 11:02:56 [+0530]:

>Hello Anders Roxell,
>
>Many thanks for your inputs.
>
>But as per my observation after this change , Cyclic test did not failed.
>
>As logs are huge size , sent only to you.
>
>Can you please check the logs and let us know your view ?

fpsimd_save_state() is almost the fpsimd_save macro which saves 32
16byte registers. And *this* takes really that long? You max says 100116
which means 100ms.

>Thanks and regards,
>Ayyappa.Ch

Sebastian

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-14 16:07           ` Sebastian Andrzej Siewior
@ 2015-05-16  6:38             ` Ayyappa Ch
  -1 siblings, 0 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-16  6:38 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Anders Roxell, linux-arm-kernel, linux-rt-users, Kevin Hilman

Yes. Due to preemt_disable and preempt_enable is causing the delay.

As cyclic log is really big , i will be sending it to you separately.

After this fix , cyclic test is completed succesfully.

On Thu, May 14, 2015 at 9:37 PM, Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
> * Ayyappa Ch | 2015-05-11 11:02:56 [+0530]:
>
>>Hello Anders Roxell,
>>
>>Many thanks for your inputs.
>>
>>But as per my observation after this change , Cyclic test did not failed.
>>
>>As logs are huge size , sent only to you.
>>
>>Can you please check the logs and let us know your view ?
>
> fpsimd_save_state() is almost the fpsimd_save macro which saves 32
> 16byte registers. And *this* takes really that long? You max says 100116
> which means 100ms.
>
>>Thanks and regards,
>>Ayyappa.Ch
>
> Sebastian

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-16  6:38             ` Ayyappa Ch
  0 siblings, 0 replies; 31+ messages in thread
From: Ayyappa Ch @ 2015-05-16  6:38 UTC (permalink / raw)
  To: linux-arm-kernel

Yes. Due to preemt_disable and preempt_enable is causing the delay.

As cyclic log is really big , i will be sending it to you separately.

After this fix , cyclic test is completed succesfully.

On Thu, May 14, 2015 at 9:37 PM, Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
> * Ayyappa Ch | 2015-05-11 11:02:56 [+0530]:
>
>>Hello Anders Roxell,
>>
>>Many thanks for your inputs.
>>
>>But as per my observation after this change , Cyclic test did not failed.
>>
>>As logs are huge size , sent only to you.
>>
>>Can you please check the logs and let us know your view ?
>
> fpsimd_save_state() is almost the fpsimd_save macro which saves 32
> 16byte registers. And *this* takes really that long? You max says 100116
> which means 100ms.
>
>>Thanks and regards,
>>Ayyappa.Ch
>
> Sebastian

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
       [not found]             ` <CAF7YWnw08YK7ogAoLTYXusOvtGCxfDPJW0H+8LyWDzNi_CbR=w@mail.gmail.com>
@ 2015-05-18 21:38                 ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 31+ messages in thread
From: Sebastian Andrzej Siewior @ 2015-05-18 21:38 UTC (permalink / raw)
  To: Gary Robertson
  Cc: Ayyappa Ch, Anders Roxell, linux-arm-kernel, RT, Kevin Hilman

* Gary Robertson | 2015-05-18 08:23:16 [-0500]:

>I have been following this thread and was able to obtain a copy of the full
>log from Anders.
>My initial impression based upon the log entries is that the excessive
>latencies did not occur during the fpsimd calls -
>but the actual progress of an individual task is a bit difficult to follow
>through the logs, so in my spare time
>I started writing a parser to sort it into a format easier to follow.  I
>hope to have it completed shortly.
>This parser will sort the log first by CPU and then by thread so the cause
>of the delay will be easier to see.

There is a smaller version of it at 
    https://breakpoint.cc/arm64_simd_trace_cpu.txt

which contains only CPU0 around that "event. Here are a few pieces:
|cyclicte-964     0....1.. 511965877us : __schedule <-schedule
cyclictest goes away

|kworker/-353     0....111 511965906us : rt_spin_unlock <-process_one_work
kworker is now active

|kworker/-353     0....112 511965954us : kernel_neon_begin_partial <-virt_efi_set_time
|kworker/-353     0....112 511965955us : preempt_count_add <-kernel_neon_begin_partial
and kworker invokes virt_efi_set_time which does the neon save thingy.

|kworker/-353     0d...212 511966764us : __handle_domain_irq <-gic_handle_irq
now we have an interrupt comming.

|kworker/-353     0dn.h412 511966793us : task_woken_rt <-ttwu_do_wakeup
it might be the timer for cyclictest wakeup it might not, however we
have the N flag set and kworker has to go.

|kworker/-353     0dn..212 511966806us : rcu_sysidle_enter <-rcu_irq_exit
|kworker/-353     0dn..212 511967108us : __handle_domain_irq <-gic_handle_irq
|kworker/-353     0dn..212 511967109us : irq_enter <-__handle_domain_irq
so we finished handling one irq and we contiunue with the next one? This
goes on and on and on until finally after a while we have this:

|kworker/-353     0dn..212 512064373us : rcu_irq_exit <-irq_exit
|kworker/-353     0dn..212 512064374us : rcu_sysidle_enter <-rcu_irq_exit
|kworker/-353     0.n..212 512065116us : kernel_neon_end <-virt_efi_set_time
|kworker/-353     0.n..212 512065116us : preempt_count_sub <-kernel_neon_end
|kworker/-353     0.n..112 512065117us : __schedule <-preempt_schedule

and this time we were able to return from rcu_irq_exit and continue with
virt_efi_set_time and finally switch the task. So yes, preemption was
disabled during kernel_neon_{being|end} but we also received 81
interrupt ("gic_handle_irq invocation") during that time. Why is that?

>Regards,
>
>Gary Robertson

Sebastian

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-18 21:38                 ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 31+ messages in thread
From: Sebastian Andrzej Siewior @ 2015-05-18 21:38 UTC (permalink / raw)
  To: linux-arm-kernel

* Gary Robertson | 2015-05-18 08:23:16 [-0500]:

>I have been following this thread and was able to obtain a copy of the full
>log from Anders.
>My initial impression based upon the log entries is that the excessive
>latencies did not occur during the fpsimd calls -
>but the actual progress of an individual task is a bit difficult to follow
>through the logs, so in my spare time
>I started writing a parser to sort it into a format easier to follow.  I
>hope to have it completed shortly.
>This parser will sort the log first by CPU and then by thread so the cause
>of the delay will be easier to see.

There is a smaller version of it at 
    https://breakpoint.cc/arm64_simd_trace_cpu.txt

which contains only CPU0 around that "event. Here are a few pieces:
|cyclicte-964     0....1.. 511965877us : __schedule <-schedule
cyclictest goes away

|kworker/-353     0....111 511965906us : rt_spin_unlock <-process_one_work
kworker is now active

|kworker/-353     0....112 511965954us : kernel_neon_begin_partial <-virt_efi_set_time
|kworker/-353     0....112 511965955us : preempt_count_add <-kernel_neon_begin_partial
and kworker invokes virt_efi_set_time which does the neon save thingy.

|kworker/-353     0d...212 511966764us : __handle_domain_irq <-gic_handle_irq
now we have an interrupt comming.

|kworker/-353     0dn.h412 511966793us : task_woken_rt <-ttwu_do_wakeup
it might be the timer for cyclictest wakeup it might not, however we
have the N flag set and kworker has to go.

|kworker/-353     0dn..212 511966806us : rcu_sysidle_enter <-rcu_irq_exit
|kworker/-353     0dn..212 511967108us : __handle_domain_irq <-gic_handle_irq
|kworker/-353     0dn..212 511967109us : irq_enter <-__handle_domain_irq
so we finished handling one irq and we contiunue with the next one? This
goes on and on and on until finally after a while we have this:

|kworker/-353     0dn..212 512064373us : rcu_irq_exit <-irq_exit
|kworker/-353     0dn..212 512064374us : rcu_sysidle_enter <-rcu_irq_exit
|kworker/-353     0.n..212 512065116us : kernel_neon_end <-virt_efi_set_time
|kworker/-353     0.n..212 512065116us : preempt_count_sub <-kernel_neon_end
|kworker/-353     0.n..112 512065117us : __schedule <-preempt_schedule

and this time we were able to return from rcu_irq_exit and continue with
virt_efi_set_time and finally switch the task. So yes, preemption was
disabled during kernel_neon_{being|end} but we also received 81
interrupt ("gic_handle_irq invocation") during that time. Why is that?

>Regards,
>
>Gary Robertson

Sebastian

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-18 21:38                 ` Sebastian Andrzej Siewior
@ 2015-05-19  0:07                   ` Thomas Gleixner
  -1 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2015-05-19  0:07 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Gary Robertson, Ayyappa Ch, Anders Roxell, linux-arm-kernel, RT,
	Kevin Hilman

On Mon, 18 May 2015, Sebastian Andrzej Siewior wrote:

> * Gary Robertson | 2015-05-18 08:23:16 [-0500]:
> 
> >I have been following this thread and was able to obtain a copy of the full
> >log from Anders.
> >My initial impression based upon the log entries is that the excessive
> >latencies did not occur during the fpsimd calls -
> >but the actual progress of an individual task is a bit difficult to follow
> >through the logs, so in my spare time
> >I started writing a parser to sort it into a format easier to follow.  I
> >hope to have it completed shortly.
> >This parser will sort the log first by CPU and then by thread so the cause
> >of the delay will be easier to see.
> 
> There is a smaller version of it at 
>     https://breakpoint.cc/arm64_simd_trace_cpu.txt
> 
> which contains only CPU0 around that "event. Here are a few pieces:
> |cyclicte-964     0....1.. 511965877us : __schedule <-schedule
> cyclictest goes away
> 
> |kworker/-353     0....111 511965906us : rt_spin_unlock <-process_one_work
> kworker is now active
> 
> |kworker/-353     0....112 511965954us : kernel_neon_begin_partial <-virt_efi_set_time
> |kworker/-353     0....112 511965955us : preempt_count_add <-kernel_neon_begin_partial
> and kworker invokes virt_efi_set_time which does the neon save thingy.
> 
> |kworker/-353     0d...212 511966764us : __handle_domain_irq <-gic_handle_irq
> now we have an interrupt comming.
> 
> |kworker/-353     0dn.h412 511966793us : task_woken_rt <-ttwu_do_wakeup
> it might be the timer for cyclictest wakeup it might not, however we
> have the N flag set and kworker has to go.
> 
> |kworker/-353     0dn..212 511966806us : rcu_sysidle_enter <-rcu_irq_exit
> |kworker/-353     0dn..212 511967108us : __handle_domain_irq <-gic_handle_irq
> |kworker/-353     0dn..212 511967109us : irq_enter <-__handle_domain_irq
> so we finished handling one irq and we contiunue with the next one? This
> goes on and on and on until finally after a while we have this:
> 
> |kworker/-353     0dn..212 512064373us : rcu_irq_exit <-irq_exit
> |kworker/-353     0dn..212 512064374us : rcu_sysidle_enter <-rcu_irq_exit
> |kworker/-353     0.n..212 512065116us : kernel_neon_end <-virt_efi_set_time
> |kworker/-353     0.n..212 512065116us : preempt_count_sub <-kernel_neon_end
> |kworker/-353     0.n..112 512065117us : __schedule <-preempt_schedule
> 
> and this time we were able to return from rcu_irq_exit and continue with
> virt_efi_set_time and finally switch the task. So yes, preemption was
> disabled during kernel_neon_{being|end} but we also received 81
> interrupt ("gic_handle_irq invocation") during that time. Why is that?

> |kworker/-353     0dn..212 512064373us : rcu_irq_exit <-irq_exit
> |kworker/-353     0dn..212 512064374us : rcu_sysidle_enter <-rcu_irq_exit

That's called from RCU_NONIDLE(). Go figure ...

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-19  0:07                   ` Thomas Gleixner
  0 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2015-05-19  0:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 18 May 2015, Sebastian Andrzej Siewior wrote:

> * Gary Robertson | 2015-05-18 08:23:16 [-0500]:
> 
> >I have been following this thread and was able to obtain a copy of the full
> >log from Anders.
> >My initial impression based upon the log entries is that the excessive
> >latencies did not occur during the fpsimd calls -
> >but the actual progress of an individual task is a bit difficult to follow
> >through the logs, so in my spare time
> >I started writing a parser to sort it into a format easier to follow.  I
> >hope to have it completed shortly.
> >This parser will sort the log first by CPU and then by thread so the cause
> >of the delay will be easier to see.
> 
> There is a smaller version of it at 
>     https://breakpoint.cc/arm64_simd_trace_cpu.txt
> 
> which contains only CPU0 around that "event. Here are a few pieces:
> |cyclicte-964     0....1.. 511965877us : __schedule <-schedule
> cyclictest goes away
> 
> |kworker/-353     0....111 511965906us : rt_spin_unlock <-process_one_work
> kworker is now active
> 
> |kworker/-353     0....112 511965954us : kernel_neon_begin_partial <-virt_efi_set_time
> |kworker/-353     0....112 511965955us : preempt_count_add <-kernel_neon_begin_partial
> and kworker invokes virt_efi_set_time which does the neon save thingy.
> 
> |kworker/-353     0d...212 511966764us : __handle_domain_irq <-gic_handle_irq
> now we have an interrupt comming.
> 
> |kworker/-353     0dn.h412 511966793us : task_woken_rt <-ttwu_do_wakeup
> it might be the timer for cyclictest wakeup it might not, however we
> have the N flag set and kworker has to go.
> 
> |kworker/-353     0dn..212 511966806us : rcu_sysidle_enter <-rcu_irq_exit
> |kworker/-353     0dn..212 511967108us : __handle_domain_irq <-gic_handle_irq
> |kworker/-353     0dn..212 511967109us : irq_enter <-__handle_domain_irq
> so we finished handling one irq and we contiunue with the next one? This
> goes on and on and on until finally after a while we have this:
> 
> |kworker/-353     0dn..212 512064373us : rcu_irq_exit <-irq_exit
> |kworker/-353     0dn..212 512064374us : rcu_sysidle_enter <-rcu_irq_exit
> |kworker/-353     0.n..212 512065116us : kernel_neon_end <-virt_efi_set_time
> |kworker/-353     0.n..212 512065116us : preempt_count_sub <-kernel_neon_end
> |kworker/-353     0.n..112 512065117us : __schedule <-preempt_schedule
> 
> and this time we were able to return from rcu_irq_exit and continue with
> virt_efi_set_time and finally switch the task. So yes, preemption was
> disabled during kernel_neon_{being|end} but we also received 81
> interrupt ("gic_handle_irq invocation") during that time. Why is that?

> |kworker/-353     0dn..212 512064373us : rcu_irq_exit <-irq_exit
> |kworker/-353     0dn..212 512064374us : rcu_sysidle_enter <-rcu_irq_exit

That's called from RCU_NONIDLE(). Go figure ...

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-01 15:29 [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd Ayyappa Ch
@ 2015-05-21 13:50   ` Anders Roxell
  2015-05-21 13:50   ` Anders Roxell
  1 sibling, 0 replies; 31+ messages in thread
From: Anders Roxell @ 2015-05-21 13:50 UTC (permalink / raw)
  To: Ayyappa Ch
  Cc: linux-arm-kernel, Sebastian Andrzej Siewior, linux-rt-users,
	kevin.hilman, ard.biesheuvel, arnd

On 2015-05-01 20:59, Ayyappa Ch wrote:
> Floating point operations in arm64 should not disable preempt .
> Activating realtime features with below code.

I've talked with an engineer who worked on fpsimd and I was told that
replacing preempt_disable with migrate_disable would leave fpsimd open
to corruption.

The kernel won't save the state of the simd registers when it is
preempted so if another task runs on the same CPU and also uses simd, it
clobbers the registers of the first task, and migrate_disable() does not
prevent that.

If we want to use SIMD with preemption enabled, we need to update the
context switch code to do a full SIMD register state save&restore if
necessary. However, this can have a noticeable cost in all task switch
latencies.

Cheers,
Anders

> 
> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> Date: Tue, 28 Apr 2015 11:53:00 +0530
> Subject: [PATCH ] floating point realtime fix
> 
> ---
>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 2438497..3dca156 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>   */
>  void fpsimd_preserve_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>   */
>  void fpsimd_restore_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> 
> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>   */
>  void fpsimd_update_current_state(struct fpsimd_state *state)
>  {
> - preempt_disable();
> + migrate_disable();
>   fpsimd_load_state(state);
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>   * that there is no longer userland FPSIMD state in the
>   * registers.
>   */
> - preempt_disable();
> + migrate_disable();
>   if (current->mm &&
>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>   fpsimd_load_partial_state(s);
>   } else {
> - preempt_enable();
> + migrate_enable();
>   }
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
> 
> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> > one issue with Cyclic test while running floating point operations. So
> > , modified below changes for that.
> >
> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> > Date: Tue, 28 Apr 2015 11:53:00 +0530
> > Subject: [PATCH ] floating point realtime fix
> >
> > ---
> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index 2438497..3dca156 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >   */
> >  void fpsimd_preserve_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >   */
> >  void fpsimd_restore_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >
> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >   */
> >  void fpsimd_update_current_state(struct fpsimd_state *state)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   fpsimd_load_state(state);
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >   * that there is no longer userland FPSIMD state in the
> >   * registers.
> >   */
> > - preempt_disable();
> > + migrate_disable();
> >   if (current->mm &&
> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >   fpsimd_load_partial_state(s);
> >   } else {
> > - preempt_enable();
> > + migrate_enable();
> >   }
> >  }
> >  EXPORT_SYMBOL(kernel_neon_end);
> > --
> > 1.9.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Anders Roxell
0708 22 71 05

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-21 13:50   ` Anders Roxell
  0 siblings, 0 replies; 31+ messages in thread
From: Anders Roxell @ 2015-05-21 13:50 UTC (permalink / raw)
  To: linux-arm-kernel

On 2015-05-01 20:59, Ayyappa Ch wrote:
> Floating point operations in arm64 should not disable preempt .
> Activating realtime features with below code.

I've talked with an engineer who worked on fpsimd and I was told that
replacing preempt_disable with migrate_disable would leave fpsimd open
to corruption.

The kernel won't save the state of the simd registers when it is
preempted so if another task runs on the same CPU and also uses simd, it
clobbers the registers of the first task, and migrate_disable() does not
prevent that.

If we want to use SIMD with preemption enabled, we need to update the
context switch code to do a full SIMD register state save&restore if
necessary. However, this can have a noticeable cost in all task switch
latencies.

Cheers,
Anders

> 
> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> Date: Tue, 28 Apr 2015 11:53:00 +0530
> Subject: [PATCH ] floating point realtime fix
> 
> ---
>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 2438497..3dca156 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>   */
>  void fpsimd_preserve_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>   */
>  void fpsimd_restore_current_state(void)
>  {
> - preempt_disable();
> + migrate_disable();
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> 
> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>   */
>  void fpsimd_update_current_state(struct fpsimd_state *state)
>  {
> - preempt_disable();
> + migrate_disable();
>   fpsimd_load_state(state);
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   struct fpsimd_state *st = &current->thread.fpsimd_state;
> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>   this_cpu_write(fpsimd_last_state, st);
>   st->cpu = smp_processor_id();
>   }
> - preempt_enable();
> + migrate_enable();
>  }
> 
>  /*
> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>   * that there is no longer userland FPSIMD state in the
>   * registers.
>   */
> - preempt_disable();
> + migrate_disable();
>   if (current->mm &&
>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>   fpsimd_save_state(&current->thread.fpsimd_state);
> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>   fpsimd_load_partial_state(s);
>   } else {
> - preempt_enable();
> + migrate_enable();
>   }
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
> 
> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
> > one issue with Cyclic test while running floating point operations. So
> > , modified below changes for that.
> >
> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
> > Date: Tue, 28 Apr 2015 11:53:00 +0530
> > Subject: [PATCH ] floating point realtime fix
> >
> > ---
> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index 2438497..3dca156 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
> >   */
> >  void fpsimd_preserve_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
> >   */
> >  void fpsimd_restore_current_state(void)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> >
> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
> >   */
> >  void fpsimd_update_current_state(struct fpsimd_state *state)
> >  {
> > - preempt_disable();
> > + migrate_disable();
> >   fpsimd_load_state(state);
> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
> >   this_cpu_write(fpsimd_last_state, st);
> >   st->cpu = smp_processor_id();
> >   }
> > - preempt_enable();
> > + migrate_enable();
> >  }
> >
> >  /*
> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
> >   * that there is no longer userland FPSIMD state in the
> >   * registers.
> >   */
> > - preempt_disable();
> > + migrate_disable();
> >   if (current->mm &&
> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> >   fpsimd_save_state(&current->thread.fpsimd_state);
> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> >   fpsimd_load_partial_state(s);
> >   } else {
> > - preempt_enable();
> > + migrate_enable();
> >   }
> >  }
> >  EXPORT_SYMBOL(kernel_neon_end);
> > --
> > 1.9.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Anders Roxell
0708 22 71 05

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-21 13:50   ` Anders Roxell
@ 2015-05-21 15:23     ` Ard Biesheuvel
  -1 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2015-05-21 15:23 UTC (permalink / raw)
  To: Anders Roxell
  Cc: Ayyappa Ch, linux-arm-kernel, Sebastian Andrzej Siewior,
	linux-rt-users, Kevin Hilman, arnd

On 21 May 2015 at 15:50, Anders Roxell <anders.roxell@gmail.com> wrote:
> On 2015-05-01 20:59, Ayyappa Ch wrote:
>> Floating point operations in arm64 should not disable preempt .
>> Activating realtime features with below code.
>
> I've talked with an engineer who worked on fpsimd and I was told that
> replacing preempt_disable with migrate_disable would leave fpsimd open
> to corruption.
>
> The kernel won't save the state of the simd registers when it is
> preempted so if another task runs on the same CPU and also uses simd, it
> clobbers the registers of the first task, and migrate_disable() does not
> prevent that.
>
> If we want to use SIMD with preemption enabled, we need to update the
> context switch code to do a full SIMD register state save&restore if
> necessary. However, this can have a noticeable cost in all task switch
> latencies.
>

I noticed somewhere in this thread that the culprit was ultimately a
call to virt_efi_set_time(), which is the UEFI Runtime Service that
programs the RTC. If this is a hot spot, then there is something very
wrong with the system which is entirely unrelated to preempt_rt.

But let's assume this is a valid UEFI Runtime Services call: since
UEFI Runtime Services are allowed to use the FP/SIMD register file, we
need the kernel_neon_begin()/kernel_neon_end() pair even though it is
highly unlikely that such a runtime service call would actually need
to use the NEON or floating point. It is simply imposed by the
kernel<->firmware ABI. Also, on this particular code path, preemption
will be disabled regardless, since the UEFI Runtime Services are
invoked with a UEFI specific TTBR0 mapping, which rules out preemption
for reasons unrelated to the FP/SIMD register file.

-- 
Ard.


>>
>> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> Date: Tue, 28 Apr 2015 11:53:00 +0530
>> Subject: [PATCH ] floating point realtime fix
>>
>> ---
>>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>>  1 file changed, 8 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 2438497..3dca156 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>>   */
>>  void fpsimd_preserve_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>>   */
>>  void fpsimd_restore_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>>
>> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>>   */
>>  void fpsimd_update_current_state(struct fpsimd_state *state)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   fpsimd_load_state(state);
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>>   * that there is no longer userland FPSIMD state in the
>>   * registers.
>>   */
>> - preempt_disable();
>> + migrate_disable();
>>   if (current->mm &&
>>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>>   fpsimd_load_partial_state(s);
>>   } else {
>> - preempt_enable();
>> + migrate_enable();
>>   }
>>  }
>>  EXPORT_SYMBOL(kernel_neon_end);
>>
>> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
>> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
>> > one issue with Cyclic test while running floating point operations. So
>> > , modified below changes for that.
>> >
>> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> > Date: Tue, 28 Apr 2015 11:53:00 +0530
>> > Subject: [PATCH ] floating point realtime fix
>> >
>> > ---
>> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >  1 file changed, 8 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> > index 2438497..3dca156 100644
>> > --- a/arch/arm64/kernel/fpsimd.c
>> > +++ b/arch/arm64/kernel/fpsimd.c
>> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >   */
>> >  void fpsimd_preserve_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >   */
>> >  void fpsimd_restore_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >
>> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >   */
>> >  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   fpsimd_load_state(state);
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >   * that there is no longer userland FPSIMD state in the
>> >   * registers.
>> >   */
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (current->mm &&
>> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >   fpsimd_load_partial_state(s);
>> >   } else {
>> > - preempt_enable();
>> > + migrate_enable();
>> >   }
>> >  }
>> >  EXPORT_SYMBOL(kernel_neon_end);
>> > --
>> > 1.9.1
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> --
> Anders Roxell
> 0708 22 71 05

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-21 15:23     ` Ard Biesheuvel
  0 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2015-05-21 15:23 UTC (permalink / raw)
  To: linux-arm-kernel

On 21 May 2015 at 15:50, Anders Roxell <anders.roxell@gmail.com> wrote:
> On 2015-05-01 20:59, Ayyappa Ch wrote:
>> Floating point operations in arm64 should not disable preempt .
>> Activating realtime features with below code.
>
> I've talked with an engineer who worked on fpsimd and I was told that
> replacing preempt_disable with migrate_disable would leave fpsimd open
> to corruption.
>
> The kernel won't save the state of the simd registers when it is
> preempted so if another task runs on the same CPU and also uses simd, it
> clobbers the registers of the first task, and migrate_disable() does not
> prevent that.
>
> If we want to use SIMD with preemption enabled, we need to update the
> context switch code to do a full SIMD register state save&restore if
> necessary. However, this can have a noticeable cost in all task switch
> latencies.
>

I noticed somewhere in this thread that the culprit was ultimately a
call to virt_efi_set_time(), which is the UEFI Runtime Service that
programs the RTC. If this is a hot spot, then there is something very
wrong with the system which is entirely unrelated to preempt_rt.

But let's assume this is a valid UEFI Runtime Services call: since
UEFI Runtime Services are allowed to use the FP/SIMD register file, we
need the kernel_neon_begin()/kernel_neon_end() pair even though it is
highly unlikely that such a runtime service call would actually need
to use the NEON or floating point. It is simply imposed by the
kernel<->firmware ABI. Also, on this particular code path, preemption
will be disabled regardless, since the UEFI Runtime Services are
invoked with a UEFI specific TTBR0 mapping, which rules out preemption
for reasons unrelated to the FP/SIMD register file.

-- 
Ard.


>>
>> From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> Date: Tue, 28 Apr 2015 11:53:00 +0530
>> Subject: [PATCH ] floating point realtime fix
>>
>> ---
>>  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>>  1 file changed, 8 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 2438497..3dca156 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>>   */
>>  void fpsimd_preserve_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>>   */
>>  void fpsimd_restore_current_state(void)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>>
>> @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>>   */
>>  void fpsimd_update_current_state(struct fpsimd_state *state)
>>  {
>> - preempt_disable();
>> + migrate_disable();
>>   fpsimd_load_state(state);
>>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>>   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>>   this_cpu_write(fpsimd_last_state, st);
>>   st->cpu = smp_processor_id();
>>   }
>> - preempt_enable();
>> + migrate_enable();
>>  }
>>
>>  /*
>> @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>>   * that there is no longer userland FPSIMD state in the
>>   * registers.
>>   */
>> - preempt_disable();
>> + migrate_disable();
>>   if (current->mm &&
>>      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>>   fpsimd_save_state(&current->thread.fpsimd_state);
>> @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>>   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>>   fpsimd_load_partial_state(s);
>>   } else {
>> - preempt_enable();
>> + migrate_enable();
>>   }
>>  }
>>  EXPORT_SYMBOL(kernel_neon_end);
>>
>> On Fri, May 1, 2015 at 1:03 PM, Ayyappa Ch <ayyappa.ch.linux@gmail.com> wrote:
>> > Following Anders Roxell  patch on "Enable PREEMPT_RT_FULL" , observed
>> > one issue with Cyclic test while running floating point operations. So
>> > , modified below changes for that.
>> >
>> > From e6a5fce9b3b55f48656240036a9354a0997c2907 Mon Sep 17 00:00:00 2001
>> > From: Ayyappa Ch <ayyappa.chandolu@amd.com>
>> > Date: Tue, 28 Apr 2015 11:53:00 +0530
>> > Subject: [PATCH ] floating point realtime fix
>> >
>> > ---
>> >  arch/arm64/kernel/fpsimd.c | 16 ++++++++--------
>> >  1 file changed, 8 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> > index 2438497..3dca156 100644
>> > --- a/arch/arm64/kernel/fpsimd.c
>> > +++ b/arch/arm64/kernel/fpsimd.c
>> > @@ -166,10 +166,10 @@ void fpsimd_flush_thread(void)
>> >   */
>> >  void fpsimd_preserve_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -179,7 +179,7 @@ void fpsimd_preserve_current_state(void)
>> >   */
>> >  void fpsimd_restore_current_state(void)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> >
>> > @@ -187,7 +187,7 @@ void fpsimd_restore_current_state(void)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -197,7 +197,7 @@ void fpsimd_restore_current_state(void)
>> >   */
>> >  void fpsimd_update_current_state(struct fpsimd_state *state)
>> >  {
>> > - preempt_disable();
>> > + migrate_disable();
>> >   fpsimd_load_state(state);
>> >   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> >   struct fpsimd_state *st = &current->thread.fpsimd_state;
>> > @@ -205,7 +205,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
>> >   this_cpu_write(fpsimd_last_state, st);
>> >   st->cpu = smp_processor_id();
>> >   }
>> > - preempt_enable();
>> > + migrate_enable();
>> >  }
>> >
>> >  /*
>> > @@ -239,7 +239,7 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >   * that there is no longer userland FPSIMD state in the
>> >   * registers.
>> >   */
>> > - preempt_disable();
>> > + migrate_disable();
>> >   if (current->mm &&
>> >      !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >   fpsimd_save_state(&current->thread.fpsimd_state);
>> > @@ -255,7 +255,7 @@ void kernel_neon_end(void)
>> >   in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >   fpsimd_load_partial_state(s);
>> >   } else {
>> > - preempt_enable();
>> > + migrate_enable();
>> >   }
>> >  }
>> >  EXPORT_SYMBOL(kernel_neon_end);
>> > --
>> > 1.9.1
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
>> the body of a message to majordomo at vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> --
> Anders Roxell
> 0708 22 71 05

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-21 15:23     ` Ard Biesheuvel
@ 2015-05-21 15:35       ` Arnd Bergmann
  -1 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2015-05-21 15:35 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Anders Roxell, Ayyappa Ch, linux-arm-kernel,
	Sebastian Andrzej Siewior, linux-rt-users, Kevin Hilman

On Thursday 21 May 2015 17:23:43 Ard Biesheuvel wrote:
> On 21 May 2015 at 15:50, Anders Roxell <anders.roxell@gmail.com> wrote:
> > On 2015-05-01 20:59, Ayyappa Ch wrote:
> >> Floating point operations in arm64 should not disable preempt .
> >> Activating realtime features with below code.
> >
> > I've talked with an engineer who worked on fpsimd and I was told that
> > replacing preempt_disable with migrate_disable would leave fpsimd open
> > to corruption.
> >
> > The kernel won't save the state of the simd registers when it is
> > preempted so if another task runs on the same CPU and also uses simd, it
> > clobbers the registers of the first task, and migrate_disable() does not
> > prevent that.
> >
> > If we want to use SIMD with preemption enabled, we need to update the
> > context switch code to do a full SIMD register state save&restore if
> > necessary. However, this can have a noticeable cost in all task switch
> > latencies.
> >
> 
> I noticed somewhere in this thread that the culprit was ultimately a
> call to virt_efi_set_time(), which is the UEFI Runtime Service that
> programs the RTC. If this is a hot spot, then there is something very
> wrong with the system which is entirely unrelated to preempt_rt.

Ah, that explains a lot!

> But let's assume this is a valid UEFI Runtime Services call: since
> UEFI Runtime Services are allowed to use the FP/SIMD register file, we
> need the kernel_neon_begin()/kernel_neon_end() pair even though it is
> highly unlikely that such a runtime service call would actually need
> to use the NEON or floating point. It is simply imposed by the
> kernel<->firmware ABI. Also, on this particular code path, preemption
> will be disabled regardless, since the UEFI Runtime Services are
> invoked with a UEFI specific TTBR0 mapping, which rules out preemption
> for reasons unrelated to the FP/SIMD register file.

Can we disable support for UEFI runtime services with preempt-rt
kernels? A 'depends on !PREEMPT_RT' would seem sufficient there.

	Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-21 15:35       ` Arnd Bergmann
  0 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2015-05-21 15:35 UTC (permalink / raw)
  To: linux-arm-kernel

On Thursday 21 May 2015 17:23:43 Ard Biesheuvel wrote:
> On 21 May 2015 at 15:50, Anders Roxell <anders.roxell@gmail.com> wrote:
> > On 2015-05-01 20:59, Ayyappa Ch wrote:
> >> Floating point operations in arm64 should not disable preempt .
> >> Activating realtime features with below code.
> >
> > I've talked with an engineer who worked on fpsimd and I was told that
> > replacing preempt_disable with migrate_disable would leave fpsimd open
> > to corruption.
> >
> > The kernel won't save the state of the simd registers when it is
> > preempted so if another task runs on the same CPU and also uses simd, it
> > clobbers the registers of the first task, and migrate_disable() does not
> > prevent that.
> >
> > If we want to use SIMD with preemption enabled, we need to update the
> > context switch code to do a full SIMD register state save&restore if
> > necessary. However, this can have a noticeable cost in all task switch
> > latencies.
> >
> 
> I noticed somewhere in this thread that the culprit was ultimately a
> call to virt_efi_set_time(), which is the UEFI Runtime Service that
> programs the RTC. If this is a hot spot, then there is something very
> wrong with the system which is entirely unrelated to preempt_rt.

Ah, that explains a lot!

> But let's assume this is a valid UEFI Runtime Services call: since
> UEFI Runtime Services are allowed to use the FP/SIMD register file, we
> need the kernel_neon_begin()/kernel_neon_end() pair even though it is
> highly unlikely that such a runtime service call would actually need
> to use the NEON or floating point. It is simply imposed by the
> kernel<->firmware ABI. Also, on this particular code path, preemption
> will be disabled regardless, since the UEFI Runtime Services are
> invoked with a UEFI specific TTBR0 mapping, which rules out preemption
> for reasons unrelated to the FP/SIMD register file.

Can we disable support for UEFI runtime services with preempt-rt
kernels? A 'depends on !PREEMPT_RT' would seem sufficient there.

	Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-21 15:35       ` Arnd Bergmann
@ 2015-05-21 16:01         ` Ard Biesheuvel
  -1 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2015-05-21 16:01 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Anders Roxell, Ayyappa Ch, linux-arm-kernel,
	Sebastian Andrzej Siewior, linux-rt-users, Kevin Hilman

On 21 May 2015 at 17:35, Arnd Bergmann <arnd@linaro.org> wrote:
> On Thursday 21 May 2015 17:23:43 Ard Biesheuvel wrote:
>> On 21 May 2015 at 15:50, Anders Roxell <anders.roxell@gmail.com> wrote:
>> > On 2015-05-01 20:59, Ayyappa Ch wrote:
>> >> Floating point operations in arm64 should not disable preempt .
>> >> Activating realtime features with below code.
>> >
>> > I've talked with an engineer who worked on fpsimd and I was told that
>> > replacing preempt_disable with migrate_disable would leave fpsimd open
>> > to corruption.
>> >
>> > The kernel won't save the state of the simd registers when it is
>> > preempted so if another task runs on the same CPU and also uses simd, it
>> > clobbers the registers of the first task, and migrate_disable() does not
>> > prevent that.
>> >
>> > If we want to use SIMD with preemption enabled, we need to update the
>> > context switch code to do a full SIMD register state save&restore if
>> > necessary. However, this can have a noticeable cost in all task switch
>> > latencies.
>> >
>>
>> I noticed somewhere in this thread that the culprit was ultimately a
>> call to virt_efi_set_time(), which is the UEFI Runtime Service that
>> programs the RTC. If this is a hot spot, then there is something very
>> wrong with the system which is entirely unrelated to preempt_rt.
>
> Ah, that explains a lot!
>
>> But let's assume this is a valid UEFI Runtime Services call: since
>> UEFI Runtime Services are allowed to use the FP/SIMD register file, we
>> need the kernel_neon_begin()/kernel_neon_end() pair even though it is
>> highly unlikely that such a runtime service call would actually need
>> to use the NEON or floating point. It is simply imposed by the
>> kernel<->firmware ABI. Also, on this particular code path, preemption
>> will be disabled regardless, since the UEFI Runtime Services are
>> invoked with a UEFI specific TTBR0 mapping, which rules out preemption
>> for reasons unrelated to the FP/SIMD register file.
>
> Can we disable support for UEFI runtime services with preempt-rt
> kernels? A 'depends on !PREEMPT_RT' would seem sufficient there.
>

You could but I wouldn't recommend it since it may also prevent you
from being able to set the boot path, but more importantly, reset and
poweroff may also be available only via UEFI Runtime Services on UEFI
systems.

So could someone comment on whether virt_efi_set_time() is present in
all the problematic traces? Or was it only chosen because it
illustrates the underlying problem the best? In the former case, there
is an hidden bug that I would like to know about: however, if some
time related facility that is used in a performance (or latency)
sensitive context ultimately ends up programming the wall clock time
in the RTC, then I would expect the same issue to occur on non-UEFI
systems as well.

If virt_efi_set_time() is merely a false positive (i.e., all
invocations are justifiable), then we are dealing with a general
latency issue caused by saving/restoring the FP/SIMD register file.
Unfortunately, there's really no way around it, since the FP/SIMD
registers are only preserved/restored on a task switch (as Anders has
also pointed out).

One thing I should point out is that this FP/SIMD save/restore is
implemented differently depending on whether it is called from process
context or from hardirq/softirq context. In the former case,
kernel_neon_begin() preserves the userland FP/SIMD context only once,
and only restores it right before returning to userland. This way,
only the first kernel_neon_begin() and the last kernel_neon_end() call
actually induce this latency, and so the average latency could be
quite a bit lower than the worst case (although I understand that few
people may care about the average in an RT context)

In non-process context, the stack/unstack is done on every call to
kernel_neon_begin/end, alyhough in that case, the
kernel_neon_begin_partial() that I implemented specifically for this
case may be used to only preserve a subset of the register file. For
example, AES-CCM uses 6 registers, and the core AES transform only 4.
Currently, this is ignored by the ordinary process context
stack/unstack routines, since the cost is amortized over more
invocations, but for the RT world, I could imagine how having a lower
latency stack/unstack also in process context could be useful.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-21 16:01         ` Ard Biesheuvel
  0 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2015-05-21 16:01 UTC (permalink / raw)
  To: linux-arm-kernel

On 21 May 2015 at 17:35, Arnd Bergmann <arnd@linaro.org> wrote:
> On Thursday 21 May 2015 17:23:43 Ard Biesheuvel wrote:
>> On 21 May 2015 at 15:50, Anders Roxell <anders.roxell@gmail.com> wrote:
>> > On 2015-05-01 20:59, Ayyappa Ch wrote:
>> >> Floating point operations in arm64 should not disable preempt .
>> >> Activating realtime features with below code.
>> >
>> > I've talked with an engineer who worked on fpsimd and I was told that
>> > replacing preempt_disable with migrate_disable would leave fpsimd open
>> > to corruption.
>> >
>> > The kernel won't save the state of the simd registers when it is
>> > preempted so if another task runs on the same CPU and also uses simd, it
>> > clobbers the registers of the first task, and migrate_disable() does not
>> > prevent that.
>> >
>> > If we want to use SIMD with preemption enabled, we need to update the
>> > context switch code to do a full SIMD register state save&restore if
>> > necessary. However, this can have a noticeable cost in all task switch
>> > latencies.
>> >
>>
>> I noticed somewhere in this thread that the culprit was ultimately a
>> call to virt_efi_set_time(), which is the UEFI Runtime Service that
>> programs the RTC. If this is a hot spot, then there is something very
>> wrong with the system which is entirely unrelated to preempt_rt.
>
> Ah, that explains a lot!
>
>> But let's assume this is a valid UEFI Runtime Services call: since
>> UEFI Runtime Services are allowed to use the FP/SIMD register file, we
>> need the kernel_neon_begin()/kernel_neon_end() pair even though it is
>> highly unlikely that such a runtime service call would actually need
>> to use the NEON or floating point. It is simply imposed by the
>> kernel<->firmware ABI. Also, on this particular code path, preemption
>> will be disabled regardless, since the UEFI Runtime Services are
>> invoked with a UEFI specific TTBR0 mapping, which rules out preemption
>> for reasons unrelated to the FP/SIMD register file.
>
> Can we disable support for UEFI runtime services with preempt-rt
> kernels? A 'depends on !PREEMPT_RT' would seem sufficient there.
>

You could but I wouldn't recommend it since it may also prevent you
from being able to set the boot path, but more importantly, reset and
poweroff may also be available only via UEFI Runtime Services on UEFI
systems.

So could someone comment on whether virt_efi_set_time() is present in
all the problematic traces? Or was it only chosen because it
illustrates the underlying problem the best? In the former case, there
is an hidden bug that I would like to know about: however, if some
time related facility that is used in a performance (or latency)
sensitive context ultimately ends up programming the wall clock time
in the RTC, then I would expect the same issue to occur on non-UEFI
systems as well.

If virt_efi_set_time() is merely a false positive (i.e., all
invocations are justifiable), then we are dealing with a general
latency issue caused by saving/restoring the FP/SIMD register file.
Unfortunately, there's really no way around it, since the FP/SIMD
registers are only preserved/restored on a task switch (as Anders has
also pointed out).

One thing I should point out is that this FP/SIMD save/restore is
implemented differently depending on whether it is called from process
context or from hardirq/softirq context. In the former case,
kernel_neon_begin() preserves the userland FP/SIMD context only once,
and only restores it right before returning to userland. This way,
only the first kernel_neon_begin() and the last kernel_neon_end() call
actually induce this latency, and so the average latency could be
quite a bit lower than the worst case (although I understand that few
people may care about the average in an RT context)

In non-process context, the stack/unstack is done on every call to
kernel_neon_begin/end, alyhough in that case, the
kernel_neon_begin_partial() that I implemented specifically for this
case may be used to only preserve a subset of the register file. For
example, AES-CCM uses 6 registers, and the core AES transform only 4.
Currently, this is ignored by the ordinary process context
stack/unstack routines, since the cost is amortized over more
invocations, but for the RT world, I could imagine how having a lower
latency stack/unstack also in process context could be useful.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-21 16:01         ` Ard Biesheuvel
@ 2015-05-22  9:46           ` Arnd Bergmann
  -1 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2015-05-22  9:46 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Anders Roxell, Ayyappa Ch, linux-arm-kernel,
	Sebastian Andrzej Siewior, linux-rt-users, Kevin Hilman

On Thursday 21 May 2015 18:01:27 Ard Biesheuvel wrote:
> 
> You could but I wouldn't recommend it since it may also prevent you
> from being able to set the boot path, but more importantly, reset and
> poweroff may also be available only via UEFI Runtime Services on UEFI
> systems.

Right, makes sense. Another option then could be to disable fpsimd
support with preempt-rt on real systems, and document this as a known
source of latency.

> So could someone comment on whether virt_efi_set_time() is present in
> all the problematic traces? Or was it only chosen because it
> illustrates the underlying problem the best? In the former case, there
> is an hidden bug that I would like to know about: however, if some
> time related facility that is used in a performance (or latency)
> sensitive context ultimately ends up programming the wall clock time
> in the RTC, then I would expect the same issue to occur on non-UEFI
> systems as well.

But without UEFI, updating the RTC would cause much less latency,
because  you don't need to save/restore the fpsimd context, disable
preemption, or call into a potentially unknown external binary
blob, the only latency you'd get there is that of a readl/writel
accessing the RTC register.

> One thing I should point out is that this FP/SIMD save/restore is
> implemented differently depending on whether it is called from process
> context or from hardirq/softirq context. In the former case,
> kernel_neon_begin() preserves the userland FP/SIMD context only once,
> and only restores it right before returning to userland. This way,
> only the first kernel_neon_begin() and the last kernel_neon_end() call
> actually induce this latency, and so the average latency could be
> quite a bit lower than the worst case (although I understand that few
> people may care about the average in an RT context)

Just for my own interest: in what case do we save/restore the fpsimd
state from interrupt context?

	Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-22  9:46           ` Arnd Bergmann
  0 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2015-05-22  9:46 UTC (permalink / raw)
  To: linux-arm-kernel

On Thursday 21 May 2015 18:01:27 Ard Biesheuvel wrote:
> 
> You could but I wouldn't recommend it since it may also prevent you
> from being able to set the boot path, but more importantly, reset and
> poweroff may also be available only via UEFI Runtime Services on UEFI
> systems.

Right, makes sense. Another option then could be to disable fpsimd
support with preempt-rt on real systems, and document this as a known
source of latency.

> So could someone comment on whether virt_efi_set_time() is present in
> all the problematic traces? Or was it only chosen because it
> illustrates the underlying problem the best? In the former case, there
> is an hidden bug that I would like to know about: however, if some
> time related facility that is used in a performance (or latency)
> sensitive context ultimately ends up programming the wall clock time
> in the RTC, then I would expect the same issue to occur on non-UEFI
> systems as well.

But without UEFI, updating the RTC would cause much less latency,
because  you don't need to save/restore the fpsimd context, disable
preemption, or call into a potentially unknown external binary
blob, the only latency you'd get there is that of a readl/writel
accessing the RTC register.

> One thing I should point out is that this FP/SIMD save/restore is
> implemented differently depending on whether it is called from process
> context or from hardirq/softirq context. In the former case,
> kernel_neon_begin() preserves the userland FP/SIMD context only once,
> and only restores it right before returning to userland. This way,
> only the first kernel_neon_begin() and the last kernel_neon_end() call
> actually induce this latency, and so the average latency could be
> quite a bit lower than the worst case (although I understand that few
> people may care about the average in an RT context)

Just for my own interest: in what case do we save/restore the fpsimd
state from interrupt context?

	Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-22  9:46           ` Arnd Bergmann
@ 2015-05-22 10:04             ` Ard Biesheuvel
  -1 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2015-05-22 10:04 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Anders Roxell, Ayyappa Ch, linux-arm-kernel,
	Sebastian Andrzej Siewior, linux-rt-users, Kevin Hilman

On 22 May 2015 at 11:46, Arnd Bergmann <arnd@linaro.org> wrote:
> On Thursday 21 May 2015 18:01:27 Ard Biesheuvel wrote:
>>
>> You could but I wouldn't recommend it since it may also prevent you
>> from being able to set the boot path, but more importantly, reset and
>> poweroff may also be available only via UEFI Runtime Services on UEFI
>> systems.
>
> Right, makes sense. Another option then could be to disable fpsimd
> support with preempt-rt on real systems, and document this as a known
> source of latency.
>

Unfortunately, that could result in corruption of userland FP/SIMD
context, since the UEFI Runtime Services are allowed to use those
registers, and only need to adhere to the normal AAPCS rules that
stipulate that q8..q15 are callee-save. That would still result in a
25% latency reduction if we only need to preserve q0..q7 and q16..q31



>> So could someone comment on whether virt_efi_set_time() is present in
>> all the problematic traces? Or was it only chosen because it
>> illustrates the underlying problem the best? In the former case, there
>> is an hidden bug that I would like to know about: however, if some
>> time related facility that is used in a performance (or latency)
>> sensitive context ultimately ends up programming the wall clock time
>> in the RTC, then I would expect the same issue to occur on non-UEFI
>> systems as well.
>
> But without UEFI, updating the RTC would cause much less latency,
> because  you don't need to save/restore the fpsimd context, disable
> preemption, or call into a potentially unknown external binary
> blob, the only latency you'd get there is that of a readl/writel
> accessing the RTC register.
>

Yes, that is right. So the UEFI Runtime Service interface is
disproportionately heavy. But that still doesn't explain why it would
make sense to sync the RTC with the system clock often enough that it
violates maximum latency limits, since normally, you read it on boot
and set it on reset/poweroff.

>> One thing I should point out is that this FP/SIMD save/restore is
>> implemented differently depending on whether it is called from process
>> context or from hardirq/softirq context. In the former case,
>> kernel_neon_begin() preserves the userland FP/SIMD context only once,
>> and only restores it right before returning to userland. This way,
>> only the first kernel_neon_begin() and the last kernel_neon_end() call
>> actually induce this latency, and so the average latency could be
>> quite a bit lower than the worst case (although I understand that few
>> people may care about the average in an RT context)
>
> Just for my own interest: in what case do we save/restore the fpsimd
> state from interrupt context?
>

For instance, the IEEE802.11 crypto runs in softirq context, but
typically performs a non-trivial amount of crypto work (unless the
hardware takes care of it). Since the accelerated AES-CCM module is
20x faster than C code, it makes sense to stack/unstack 6 NEON
registers and run it on the NEON.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-22 10:04             ` Ard Biesheuvel
  0 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2015-05-22 10:04 UTC (permalink / raw)
  To: linux-arm-kernel

On 22 May 2015 at 11:46, Arnd Bergmann <arnd@linaro.org> wrote:
> On Thursday 21 May 2015 18:01:27 Ard Biesheuvel wrote:
>>
>> You could but I wouldn't recommend it since it may also prevent you
>> from being able to set the boot path, but more importantly, reset and
>> poweroff may also be available only via UEFI Runtime Services on UEFI
>> systems.
>
> Right, makes sense. Another option then could be to disable fpsimd
> support with preempt-rt on real systems, and document this as a known
> source of latency.
>

Unfortunately, that could result in corruption of userland FP/SIMD
context, since the UEFI Runtime Services are allowed to use those
registers, and only need to adhere to the normal AAPCS rules that
stipulate that q8..q15 are callee-save. That would still result in a
25% latency reduction if we only need to preserve q0..q7 and q16..q31



>> So could someone comment on whether virt_efi_set_time() is present in
>> all the problematic traces? Or was it only chosen because it
>> illustrates the underlying problem the best? In the former case, there
>> is an hidden bug that I would like to know about: however, if some
>> time related facility that is used in a performance (or latency)
>> sensitive context ultimately ends up programming the wall clock time
>> in the RTC, then I would expect the same issue to occur on non-UEFI
>> systems as well.
>
> But without UEFI, updating the RTC would cause much less latency,
> because  you don't need to save/restore the fpsimd context, disable
> preemption, or call into a potentially unknown external binary
> blob, the only latency you'd get there is that of a readl/writel
> accessing the RTC register.
>

Yes, that is right. So the UEFI Runtime Service interface is
disproportionately heavy. But that still doesn't explain why it would
make sense to sync the RTC with the system clock often enough that it
violates maximum latency limits, since normally, you read it on boot
and set it on reset/poweroff.

>> One thing I should point out is that this FP/SIMD save/restore is
>> implemented differently depending on whether it is called from process
>> context or from hardirq/softirq context. In the former case,
>> kernel_neon_begin() preserves the userland FP/SIMD context only once,
>> and only restores it right before returning to userland. This way,
>> only the first kernel_neon_begin() and the last kernel_neon_end() call
>> actually induce this latency, and so the average latency could be
>> quite a bit lower than the worst case (although I understand that few
>> people may care about the average in an RT context)
>
> Just for my own interest: in what case do we save/restore the fpsimd
> state from interrupt context?
>

For instance, the IEEE802.11 crypto runs in softirq context, but
typically performs a non-trivial amount of crypto work (unless the
hardware takes care of it). Since the accelerated AES-CCM module is
20x faster than C code, it makes sense to stack/unstack 6 NEON
registers and run it on the NEON.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
  2015-05-22 10:04             ` Ard Biesheuvel
@ 2015-05-22 10:31               ` Arnd Bergmann
  -1 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2015-05-22 10:31 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Anders Roxell, Ayyappa Ch, linux-arm-kernel,
	Sebastian Andrzej Siewior, linux-rt-users, Kevin Hilman

On Friday 22 May 2015 12:04:20 Ard Biesheuvel wrote:
> On 22 May 2015 at 11:46, Arnd Bergmann <arnd@linaro.org> wrote:
> > On Thursday 21 May 2015 18:01:27 Ard Biesheuvel wrote:
> >>
> >> You could but I wouldn't recommend it since it may also prevent you
> >> from being able to set the boot path, but more importantly, reset and
> >> poweroff may also be available only via UEFI Runtime Services on UEFI
> >> systems.
> >
> > Right, makes sense. Another option then could be to disable fpsimd
> > support with preempt-rt on real systems, and document this as a known
> > source of latency.
> >
> 
> Unfortunately, that could result in corruption of userland FP/SIMD
> context, since the UEFI Runtime Services are allowed to use those
> registers, and only need to adhere to the normal AAPCS rules that
> stipulate that q8..q15 are callee-save. That would still result in a
> 25% latency reduction if we only need to preserve q0..q7 and q16..q31

Ah, of course. In some cases, one could probably build the entire
user space without fpsimd support as well, but that obviously
wouldn't be a general recommendation.

> >> One thing I should point out is that this FP/SIMD save/restore is
> >> implemented differently depending on whether it is called from process
> >> context or from hardirq/softirq context. In the former case,
> >> kernel_neon_begin() preserves the userland FP/SIMD context only once,
> >> and only restores it right before returning to userland. This way,
> >> only the first kernel_neon_begin() and the last kernel_neon_end() call
> >> actually induce this latency, and so the average latency could be
> >> quite a bit lower than the worst case (although I understand that few
> >> people may care about the average in an RT context)
> >
> > Just for my own interest: in what case do we save/restore the fpsimd
> > state from interrupt context?
> >
> 
> For instance, the IEEE802.11 crypto runs in softirq context, but
> typically performs a non-trivial amount of crypto work (unless the
> hardware takes care of it). Since the accelerated AES-CCM module is
> 20x faster than C code, it makes sense to stack/unstack 6 NEON
> registers and run it on the NEON.

I see, thanks!

	Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd
@ 2015-05-22 10:31               ` Arnd Bergmann
  0 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2015-05-22 10:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Friday 22 May 2015 12:04:20 Ard Biesheuvel wrote:
> On 22 May 2015 at 11:46, Arnd Bergmann <arnd@linaro.org> wrote:
> > On Thursday 21 May 2015 18:01:27 Ard Biesheuvel wrote:
> >>
> >> You could but I wouldn't recommend it since it may also prevent you
> >> from being able to set the boot path, but more importantly, reset and
> >> poweroff may also be available only via UEFI Runtime Services on UEFI
> >> systems.
> >
> > Right, makes sense. Another option then could be to disable fpsimd
> > support with preempt-rt on real systems, and document this as a known
> > source of latency.
> >
> 
> Unfortunately, that could result in corruption of userland FP/SIMD
> context, since the UEFI Runtime Services are allowed to use those
> registers, and only need to adhere to the normal AAPCS rules that
> stipulate that q8..q15 are callee-save. That would still result in a
> 25% latency reduction if we only need to preserve q0..q7 and q16..q31

Ah, of course. In some cases, one could probably build the entire
user space without fpsimd support as well, but that obviously
wouldn't be a general recommendation.

> >> One thing I should point out is that this FP/SIMD save/restore is
> >> implemented differently depending on whether it is called from process
> >> context or from hardirq/softirq context. In the former case,
> >> kernel_neon_begin() preserves the userland FP/SIMD context only once,
> >> and only restores it right before returning to userland. This way,
> >> only the first kernel_neon_begin() and the last kernel_neon_end() call
> >> actually induce this latency, and so the average latency could be
> >> quite a bit lower than the worst case (although I understand that few
> >> people may care about the average in an RT context)
> >
> > Just for my own interest: in what case do we save/restore the fpsimd
> > state from interrupt context?
> >
> 
> For instance, the IEEE802.11 crypto runs in softirq context, but
> typically performs a non-trivial amount of crypto work (unless the
> hardware takes care of it). Since the accelerated AES-CCM module is
> 20x faster than C code, it makes sense to stack/unstack 6 NEON
> registers and run it on the NEON.

I see, thanks!

	Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2015-05-22 10:31 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-01 15:29 [PATCH] arch/arm64 :Cyclic Test fix in ARM64 fpsimd Ayyappa Ch
2015-05-06 19:38 ` Anders Roxell
2015-05-06 19:38   ` Anders Roxell
2015-05-07 11:09   ` Ayyappa Ch
2015-05-07 11:09     ` Ayyappa Ch
2015-05-08  0:09     ` Anders Roxell
2015-05-08  0:09       ` Anders Roxell
2015-05-11  5:32       ` Ayyappa Ch
2015-05-11  5:32         ` Ayyappa Ch
2015-05-14 16:07         ` Sebastian Andrzej Siewior
2015-05-14 16:07           ` Sebastian Andrzej Siewior
2015-05-16  6:38           ` Ayyappa Ch
2015-05-16  6:38             ` Ayyappa Ch
     [not found]             ` <CAF7YWnw08YK7ogAoLTYXusOvtGCxfDPJW0H+8LyWDzNi_CbR=w@mail.gmail.com>
2015-05-18 21:38               ` Sebastian Andrzej Siewior
2015-05-18 21:38                 ` Sebastian Andrzej Siewior
2015-05-19  0:07                 ` Thomas Gleixner
2015-05-19  0:07                   ` Thomas Gleixner
2015-05-21 13:50 ` Anders Roxell
2015-05-21 13:50   ` Anders Roxell
2015-05-21 15:23   ` Ard Biesheuvel
2015-05-21 15:23     ` Ard Biesheuvel
2015-05-21 15:35     ` Arnd Bergmann
2015-05-21 15:35       ` Arnd Bergmann
2015-05-21 16:01       ` Ard Biesheuvel
2015-05-21 16:01         ` Ard Biesheuvel
2015-05-22  9:46         ` Arnd Bergmann
2015-05-22  9:46           ` Arnd Bergmann
2015-05-22 10:04           ` Ard Biesheuvel
2015-05-22 10:04             ` Ard Biesheuvel
2015-05-22 10:31             ` Arnd Bergmann
2015-05-22 10:31               ` Arnd Bergmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.