All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-17 12:40 ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-17 12:40 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, tglx, Steven Rostedt, Catalin Marinas, Will Deacon,
	linux-arm-kernel

In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.
Add a locallock around next to the local_bh_disable(). This fulfill the
requirement that the code is not invoked again in different context on
the same CPU while it remains preemptible.

The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
is not working on -RT. We don't use NEON in kernel mode on RT right now
but this still should be addressed.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e7226c4c7493..3a5cd1908874 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -38,6 +38,7 @@
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/locallock.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
@@ -235,7 +236,7 @@ static void sve_user_enable(void)
  *    whether TIF_SVE is clear or set, since these are not vector length
  *    dependent.
  */
-
+static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
 /*
  * Update current's FPSIMD/SVE registers from thread_struct.
  *
@@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		local_lock(fpsimd_lock);
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
+		local_unlock(fpsimd_lock);
 		local_bh_enable();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -838,6 +842,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 	sve_alloc(current);
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	task_fpsimd_save();
 	fpsimd_to_sve(current);
@@ -849,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 	if (test_and_set_thread_flag(TIF_SVE))
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -926,6 +932,7 @@ void fpsimd_flush_thread(void)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
@@ -967,6 +974,7 @@ void fpsimd_flush_thread(void)
 
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -980,7 +988,9 @@ void fpsimd_preserve_current_state(void)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 	task_fpsimd_save();
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -1022,12 +1032,14 @@ void fpsimd_restore_current_state(void)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		task_fpsimd_load();
 		fpsimd_bind_to_cpu();
 	}
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -1042,6 +1054,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1052,6 +1065,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_bind_to_cpu();
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -1116,6 +1130,7 @@ void kernel_neon_begin(void)
 	BUG_ON(!may_use_simd());
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	__this_cpu_write(kernel_neon_busy, true);
 
@@ -1128,6 +1143,7 @@ void kernel_neon_begin(void)
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
 
+	local_unlock(fpsimd_lock);
 	preempt_disable();
 
 	local_bh_enable();
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-17 12:40 ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-17 12:40 UTC (permalink / raw)
  To: linux-arm-kernel

In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.
Add a locallock around next to the local_bh_disable(). This fulfill the
requirement that the code is not invoked again in different context on
the same CPU while it remains preemptible.

The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
is not working on -RT. We don't use NEON in kernel mode on RT right now
but this still should be addressed.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e7226c4c7493..3a5cd1908874 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -38,6 +38,7 @@
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/locallock.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
@@ -235,7 +236,7 @@ static void sve_user_enable(void)
  *    whether TIF_SVE is clear or set, since these are not vector length
  *    dependent.
  */
-
+static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
 /*
  * Update current's FPSIMD/SVE registers from thread_struct.
  *
@@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		local_lock(fpsimd_lock);
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
+		local_unlock(fpsimd_lock);
 		local_bh_enable();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -838,6 +842,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 	sve_alloc(current);
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	task_fpsimd_save();
 	fpsimd_to_sve(current);
@@ -849,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 	if (test_and_set_thread_flag(TIF_SVE))
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -926,6 +932,7 @@ void fpsimd_flush_thread(void)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
@@ -967,6 +974,7 @@ void fpsimd_flush_thread(void)
 
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -980,7 +988,9 @@ void fpsimd_preserve_current_state(void)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 	task_fpsimd_save();
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -1022,12 +1032,14 @@ void fpsimd_restore_current_state(void)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		task_fpsimd_load();
 		fpsimd_bind_to_cpu();
 	}
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -1042,6 +1054,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 		return;
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1052,6 +1065,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_bind_to_cpu();
 
+	local_unlock(fpsimd_lock);
 	local_bh_enable();
 }
 
@@ -1116,6 +1130,7 @@ void kernel_neon_begin(void)
 	BUG_ON(!may_use_simd());
 
 	local_bh_disable();
+	local_lock(fpsimd_lock);
 
 	__this_cpu_write(kernel_neon_busy, true);
 
@@ -1128,6 +1143,7 @@ void kernel_neon_begin(void)
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
 
+	local_unlock(fpsimd_lock);
 	preempt_disable();
 
 	local_bh_enable();
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-17 12:40 ` Sebastian Andrzej Siewior
@ 2018-05-17 18:19   ` Dave Martin
  -1 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-05-17 18:19 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel,
	Steven Rostedt, tglx, linux-arm-kernel

On Thu, May 17, 2018 at 02:40:06PM +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the

Also, watch out for [1] which adds more of this stuff for KVM.  This
not merged yet, but likely to land in v4.18.

> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> Add a locallock around next to the local_bh_disable(). This fulfill the
> requirement that the code is not invoked again in different context on
> the same CPU while it remains preemptible.

Thanks for this.

*WARNING*: My RT-fu is weak to nonexistent, so don't assume that
anything I suggest below is correct without thinking carefully about
it :)

Anyway:

What we're really trying to achieve with the local_bh_disable/enable
stuff is exclusive access to the CPU FPSIMD registers and associated
metadata that tracks who they belong to.

> The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
> is not working on -RT. We don't use NEON in kernel mode on RT right now
> but this still should be addressed.

I think we effectively have two levels of locking here.

At the outer level, we want exclusive access to the FPSIMD registers.
This is what is needed between kernel_neon_begin() and
kernel_neon_end(), and maps onto the preempt_disable()/_enable() done
by these functions.

In context switch critical code, that's insufficient, and we also
need exclusive access to the metadata that tracks which task or context
owns the FPSIMD registers.  This is what the local_bh_disable()/
_enable() achieves.


So does it make sense to have two locks (I'm assuming local locks are
implicitly percpu ?)

static inline void local_fpsimd_context_lock(void)
{
	local_bh_disable();
	local_lock(fpsimd_lock);
	local_lock(fpsimd_context_lock);
}

static inline void local_fpsimd_context_unlock(void)
{
	local_unlock(fpsimd_context_lock);
	local_unlock(fpsimd_lock);
	local_bh_enable();
}


kernel_neon_begin() could then do

	local_fpsimd_context_lock();

	/* ... */

	preempt_disable();
	local_unlock(fpsimd_context_lock);

... with the following in kernel_neon_end():

	local_unlock(fpsimd_lock);
	preempt_enable();


If kernel-mode NEON was considered harmful to RT due to the context
switch overheads, then the above might be overkill.  SVE will be worse
in that regard, and also needs thinking about at some point -- I've not
looked at if from the RT angle at all.

In either case, I think abstracting the lock/unlock sequences out to
make the purpose clearer may be a good idea, even if we just have a
single local lock to manage.


There is one place where I mess with the FPSIMD context no lock held
because of a need to copy_from_user() straight into the context backing
store (we can't copy_from_user() with preemption disabled...)
I'm not sure whether RT will have any impact on this, but it probably
needs thinking about.

One more comment below...

> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e7226c4c7493..3a5cd1908874 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -38,6 +38,7 @@
>  #include <linux/signal.h>
>  #include <linux/slab.h>
>  #include <linux/sysctl.h>
> +#include <linux/locallock.h>
>  
>  #include <asm/fpsimd.h>
>  #include <asm/cputype.h>
> @@ -235,7 +236,7 @@ static void sve_user_enable(void)
>   *    whether TIF_SVE is clear or set, since these are not vector length
>   *    dependent.
>   */
> -
> +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
>  /*
>   * Update current's FPSIMD/SVE registers from thread_struct.
>   *
> @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		local_lock(fpsimd_lock);
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
> +		local_unlock(fpsimd_lock);

Is this misordered against local_bh_enable(), or doesn't it matter?

>  		local_bh_enable();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -838,6 +842,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
>  	sve_alloc(current);
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	task_fpsimd_save();
>  	fpsimd_to_sve(current);
> @@ -849,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
>  	if (test_and_set_thread_flag(TIF_SVE))
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -926,6 +932,7 @@ void fpsimd_flush_thread(void)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
>  	fpsimd_flush_task_state(current);
> @@ -967,6 +974,7 @@ void fpsimd_flush_thread(void)
>  
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -980,7 +988,9 @@ void fpsimd_preserve_current_state(void)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  	task_fpsimd_save();
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -1022,12 +1032,14 @@ void fpsimd_restore_current_state(void)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>  		task_fpsimd_load();
>  		fpsimd_bind_to_cpu();
>  	}
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -1042,6 +1054,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
>  	if (system_supports_sve() && test_thread_flag(TIF_SVE))
> @@ -1052,6 +1065,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
>  		fpsimd_bind_to_cpu();
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -1116,6 +1130,7 @@ void kernel_neon_begin(void)
>  	BUG_ON(!may_use_simd());
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	__this_cpu_write(kernel_neon_busy, true);
>  
> @@ -1128,6 +1143,7 @@ void kernel_neon_begin(void)
>  	/* Invalidate any task state remaining in the fpsimd regs: */
>  	fpsimd_flush_cpu_state();
>  
> +	local_unlock(fpsimd_lock);
>  	preempt_disable();
>  
>  	local_bh_enable();

The general approach looks reasonable, based on my guesswork about what
the local_lock/_unlocks are doing here.

Cheers
---Dave

[...]

[1] [PATCH v7 00/16] KVM: arm64: Optimise FPSIMD context switching
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-May/576595.html

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-17 18:19   ` Dave Martin
  0 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-05-17 18:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, May 17, 2018 at 02:40:06PM +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the

Also, watch out for [1] which adds more of this stuff for KVM.  This
not merged yet, but likely to land in v4.18.

> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> Add a locallock around next to the local_bh_disable(). This fulfill the
> requirement that the code is not invoked again in different context on
> the same CPU while it remains preemptible.

Thanks for this.

*WARNING*: My RT-fu is weak to nonexistent, so don't assume that
anything I suggest below is correct without thinking carefully about
it :)

Anyway:

What we're really trying to achieve with the local_bh_disable/enable
stuff is exclusive access to the CPU FPSIMD registers and associated
metadata that tracks who they belong to.

> The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
> is not working on -RT. We don't use NEON in kernel mode on RT right now
> but this still should be addressed.

I think we effectively have two levels of locking here.

At the outer level, we want exclusive access to the FPSIMD registers.
This is what is needed between kernel_neon_begin() and
kernel_neon_end(), and maps onto the preempt_disable()/_enable() done
by these functions.

In context switch critical code, that's insufficient, and we also
need exclusive access to the metadata that tracks which task or context
owns the FPSIMD registers.  This is what the local_bh_disable()/
_enable() achieves.


So does it make sense to have two locks (I'm assuming local locks are
implicitly percpu ?)

static inline void local_fpsimd_context_lock(void)
{
	local_bh_disable();
	local_lock(fpsimd_lock);
	local_lock(fpsimd_context_lock);
}

static inline void local_fpsimd_context_unlock(void)
{
	local_unlock(fpsimd_context_lock);
	local_unlock(fpsimd_lock);
	local_bh_enable();
}


kernel_neon_begin() could then do

	local_fpsimd_context_lock();

	/* ... */

	preempt_disable();
	local_unlock(fpsimd_context_lock);

... with the following in kernel_neon_end():

	local_unlock(fpsimd_lock);
	preempt_enable();


If kernel-mode NEON was considered harmful to RT due to the context
switch overheads, then the above might be overkill.  SVE will be worse
in that regard, and also needs thinking about at some point -- I've not
looked at if from the RT angle at all.

In either case, I think abstracting the lock/unlock sequences out to
make the purpose clearer may be a good idea, even if we just have a
single local lock to manage.


There is one place where I mess with the FPSIMD context no lock held
because of a need to copy_from_user() straight into the context backing
store (we can't copy_from_user() with preemption disabled...)
I'm not sure whether RT will have any impact on this, but it probably
needs thinking about.

One more comment below...

> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e7226c4c7493..3a5cd1908874 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -38,6 +38,7 @@
>  #include <linux/signal.h>
>  #include <linux/slab.h>
>  #include <linux/sysctl.h>
> +#include <linux/locallock.h>
>  
>  #include <asm/fpsimd.h>
>  #include <asm/cputype.h>
> @@ -235,7 +236,7 @@ static void sve_user_enable(void)
>   *    whether TIF_SVE is clear or set, since these are not vector length
>   *    dependent.
>   */
> -
> +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
>  /*
>   * Update current's FPSIMD/SVE registers from thread_struct.
>   *
> @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		local_lock(fpsimd_lock);
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
> +		local_unlock(fpsimd_lock);

Is this misordered against local_bh_enable(), or doesn't it matter?

>  		local_bh_enable();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -838,6 +842,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
>  	sve_alloc(current);
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	task_fpsimd_save();
>  	fpsimd_to_sve(current);
> @@ -849,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
>  	if (test_and_set_thread_flag(TIF_SVE))
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -926,6 +932,7 @@ void fpsimd_flush_thread(void)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
>  	fpsimd_flush_task_state(current);
> @@ -967,6 +974,7 @@ void fpsimd_flush_thread(void)
>  
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -980,7 +988,9 @@ void fpsimd_preserve_current_state(void)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  	task_fpsimd_save();
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -1022,12 +1032,14 @@ void fpsimd_restore_current_state(void)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>  		task_fpsimd_load();
>  		fpsimd_bind_to_cpu();
>  	}
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -1042,6 +1054,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
>  		return;
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
>  	if (system_supports_sve() && test_thread_flag(TIF_SVE))
> @@ -1052,6 +1065,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
>  		fpsimd_bind_to_cpu();
>  
> +	local_unlock(fpsimd_lock);
>  	local_bh_enable();
>  }
>  
> @@ -1116,6 +1130,7 @@ void kernel_neon_begin(void)
>  	BUG_ON(!may_use_simd());
>  
>  	local_bh_disable();
> +	local_lock(fpsimd_lock);
>  
>  	__this_cpu_write(kernel_neon_busy, true);
>  
> @@ -1128,6 +1143,7 @@ void kernel_neon_begin(void)
>  	/* Invalidate any task state remaining in the fpsimd regs: */
>  	fpsimd_flush_cpu_state();
>  
> +	local_unlock(fpsimd_lock);
>  	preempt_disable();
>  
>  	local_bh_enable();

The general approach looks reasonable, based on my guesswork about what
the local_lock/_unlocks are doing here.

Cheers
---Dave

[...]

[1] [PATCH v7 00/16] KVM: arm64: Optimise FPSIMD context switching
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-May/576595.html

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-17 18:19   ` Dave Martin
@ 2018-05-18 12:46     ` Dave Martin
  -1 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-05-18 12:46 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel,
	Steven Rostedt, tglx, linux-arm-kernel

On Thu, May 17, 2018 at 07:19:43PM +0100, Dave Martin wrote:

[...]

> kernel_neon_begin() could then do
> 
> 	local_fpsimd_context_lock();
> 
> 	/* ... */
> 
> 	preempt_disable();
> 	local_unlock(fpsimd_context_lock);
> 
> ... with the following in kernel_neon_end():
> 
> 	local_unlock(fpsimd_lock);
> 	preempt_enable();
> 
> 
> If kernel-mode NEON was considered harmful to RT due to the context
> switch overheads, then the above might be overkill.  SVE will be worse
> in that regard, and also needs thinking about at some point -- I've not
> looked at if from the RT angle at all.

Hmmm, !KERNEL_MODE_NEON breaks EFI, so this probably does want looking
at.  Ard's recent rework to enable voluntary preemption the crypto
backends for arm64 [1] should reduce the fpsimd_lock blackouts, but it
still depends on the backends playing nice.

Cheers
---Dave

[1] [PATCH resend 00/10] crypto: arm64 - play nice with CONFIG_PREEMPT
lists.infradead.org/pipermail/linux-arm-kernel/2018-April/574819.html

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-18 12:46     ` Dave Martin
  0 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-05-18 12:46 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, May 17, 2018 at 07:19:43PM +0100, Dave Martin wrote:

[...]

> kernel_neon_begin() could then do
> 
> 	local_fpsimd_context_lock();
> 
> 	/* ... */
> 
> 	preempt_disable();
> 	local_unlock(fpsimd_context_lock);
> 
> ... with the following in kernel_neon_end():
> 
> 	local_unlock(fpsimd_lock);
> 	preempt_enable();
> 
> 
> If kernel-mode NEON was considered harmful to RT due to the context
> switch overheads, then the above might be overkill.  SVE will be worse
> in that regard, and also needs thinking about at some point -- I've not
> looked at if from the RT angle at all.

Hmmm, !KERNEL_MODE_NEON breaks EFI, so this probably does want looking
at.  Ard's recent rework to enable voluntary preemption the crypto
backends for arm64 [1] should reduce the fpsimd_lock blackouts, but it
still depends on the backends playing nice.

Cheers
---Dave

[1] [PATCH resend 00/10] crypto: arm64 - play nice with CONFIG_PREEMPT
lists.infradead.org/pipermail/linux-arm-kernel/2018-April/574819.html

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-17 12:40 ` Sebastian Andrzej Siewior
@ 2018-05-22 17:10   ` Steven Rostedt
  -1 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-05-22 17:10 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On Thu, 17 May 2018 14:40:06 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
>  /*
>   * Update current's FPSIMD/SVE registers from thread_struct.
>   *
> @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		local_lock(fpsimd_lock);
>  		local_bh_disable();

I'm surprised that we don't have a "local_lock_bh()"?

-- Steve

>  
>  		task_fpsimd_save();

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-22 17:10   ` Steven Rostedt
  0 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-05-22 17:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 17 May 2018 14:40:06 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
>  /*
>   * Update current's FPSIMD/SVE registers from thread_struct.
>   *
> @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		local_lock(fpsimd_lock);
>  		local_bh_disable();

I'm surprised that we don't have a "local_lock_bh()"?

-- Steve

>  
>  		task_fpsimd_save();

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-22 17:10   ` Steven Rostedt
@ 2018-05-22 17:21     ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-22 17:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:
> On Thu, 17 May 2018 14:40:06 +0200
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> >  /*
> >   * Update current's FPSIMD/SVE registers from thread_struct.
> >   *
> > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> >  	 * non-SVE thread.
> >  	 */
> >  	if (task == current) {
> > +		local_lock(fpsimd_lock);
> >  		local_bh_disable();
> 
> I'm surprised that we don't have a "local_lock_bh()"?

right. Like the last time when we introduced a global lock with no
locking context? 

> -- Steve
> 
> >  
> >  		task_fpsimd_save();

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-22 17:21     ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-22 17:21 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:
> On Thu, 17 May 2018 14:40:06 +0200
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> >  /*
> >   * Update current's FPSIMD/SVE registers from thread_struct.
> >   *
> > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> >  	 * non-SVE thread.
> >  	 */
> >  	if (task == current) {
> > +		local_lock(fpsimd_lock);
> >  		local_bh_disable();
> 
> I'm surprised that we don't have a "local_lock_bh()"?

right. Like the last time when we introduced a global lock with no
locking context? 

> -- Steve
> 
> >  
> >  		task_fpsimd_save();

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-22 17:21     ` Sebastian Andrzej Siewior
@ 2018-05-22 17:24       ` Steven Rostedt
  -1 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-05-22 17:24 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On Tue, 22 May 2018 19:21:16 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:
> > On Thu, 17 May 2018 14:40:06 +0200
> > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >   
> > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > >  /*
> > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > >   *
> > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > >  	 * non-SVE thread.
> > >  	 */
> > >  	if (task == current) {
> > > +		local_lock(fpsimd_lock);
> > >  		local_bh_disable();  
> > 
> > I'm surprised that we don't have a "local_lock_bh()"?  
> 
> right. Like the last time when we introduced a global lock with no
> locking context? 
> 

I meant, we could have local_lock_bh(fpsimd_lock); that would turn into
a local_bh_disable() when !PREEMPT_RT.

-- Steve

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-22 17:24       ` Steven Rostedt
  0 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-05-22 17:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 22 May 2018 19:21:16 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:
> > On Thu, 17 May 2018 14:40:06 +0200
> > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >   
> > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > >  /*
> > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > >   *
> > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > >  	 * non-SVE thread.
> > >  	 */
> > >  	if (task == current) {
> > > +		local_lock(fpsimd_lock);
> > >  		local_bh_disable();  
> > 
> > I'm surprised that we don't have a "local_lock_bh()"?  
> 
> right. Like the last time when we introduced a global lock with no
> locking context? 
> 

I meant, we could have local_lock_bh(fpsimd_lock); that would turn into
a local_bh_disable() when !PREEMPT_RT.

-- Steve

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-22 17:24       ` Steven Rostedt
@ 2018-05-22 17:33         ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-22 17:33 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On 2018-05-22 13:24:29 [-0400], Steven Rostedt wrote:
> On Tue, 22 May 2018 19:21:16 +0200
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:
> > > On Thu, 17 May 2018 14:40:06 +0200
> > > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > >   
> > > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > > >  /*
> > > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > > >   *
> > > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > > >  	 * non-SVE thread.
> > > >  	 */
> > > >  	if (task == current) {
> > > > +		local_lock(fpsimd_lock);
> > > >  		local_bh_disable();  
> > > 
> > > I'm surprised that we don't have a "local_lock_bh()"?  
> > 
> > right. Like the last time when we introduced a global lock with no
> > locking context? 
> > 
> 
> I meant, we could have local_lock_bh(fpsimd_lock); that would turn into
> a local_bh_disable() when !PREEMPT_RT.

Oh that part. That could be possible I guess. I need to look into the
second part which disables preemption while the FPU is taken.

> -- Steve

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-22 17:33         ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-22 17:33 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-05-22 13:24:29 [-0400], Steven Rostedt wrote:
> On Tue, 22 May 2018 19:21:16 +0200
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:
> > > On Thu, 17 May 2018 14:40:06 +0200
> > > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > >   
> > > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > > >  /*
> > > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > > >   *
> > > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > > >  	 * non-SVE thread.
> > > >  	 */
> > > >  	if (task == current) {
> > > > +		local_lock(fpsimd_lock);
> > > >  		local_bh_disable();  
> > > 
> > > I'm surprised that we don't have a "local_lock_bh()"?  
> > 
> > right. Like the last time when we introduced a global lock with no
> > locking context? 
> > 
> 
> I meant, we could have local_lock_bh(fpsimd_lock); that would turn into
> a local_bh_disable() when !PREEMPT_RT.

Oh that part. That could be possible I guess. I need to look into the
second part which disables preemption while the FPU is taken.

> -- Steve

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-17 18:19   ` Dave Martin
@ 2018-05-23 14:31     ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-23 14:31 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel,
	Steven Rostedt, tglx, linux-arm-kernel

On 2018-05-17 19:19:43 [+0100], Dave Martin wrote:
> On Thu, May 17, 2018 at 02:40:06PM +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> 
> Also, watch out for [1] which adds more of this stuff for KVM.  This
> not merged yet, but likely to land in v4.18.

yay. I will try to keep this in mind while moving forward.

> Anyway:
> 
> What we're really trying to achieve with the local_bh_disable/enable
> stuff is exclusive access to the CPU FPSIMD registers and associated
> metadata that tracks who they belong to.

I assumed this.

> > The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
> > is not working on -RT. We don't use NEON in kernel mode on RT right now
> > but this still should be addressed.
> 
> I think we effectively have two levels of locking here.
> 
> At the outer level, we want exclusive access to the FPSIMD registers.
> This is what is needed between kernel_neon_begin() and
> kernel_neon_end(), and maps onto the preempt_disable()/_enable() done
> by these functions.
> 
> In context switch critical code, that's insufficient, and we also
> need exclusive access to the metadata that tracks which task or context
> owns the FPSIMD registers.  This is what the local_bh_disable()/
> _enable() achieves.
> 
> 
> So does it make sense to have two locks (I'm assuming local locks are
> implicitly percpu ?)
> 
> static inline void local_fpsimd_context_lock(void)
> {
> 	local_bh_disable();
> 	local_lock(fpsimd_lock);
> 	local_lock(fpsimd_context_lock);
> }
> 
> static inline void local_fpsimd_context_unlock(void)
> {
> 	local_unlock(fpsimd_context_lock);
> 	local_unlock(fpsimd_lock);
> 	local_bh_enable();
> }
> 
> 
> kernel_neon_begin() could then do
> 
> 	local_fpsimd_context_lock();
> 
> 	/* ... */
> 
> 	preempt_disable();
> 	local_unlock(fpsimd_context_lock);
> 
> ... with the following in kernel_neon_end():
> 
> 	local_unlock(fpsimd_lock);
> 	preempt_enable();
> 
> 
> If kernel-mode NEON was considered harmful to RT due to the context
> switch overheads, then the above might be overkill.  SVE will be worse
> in that regard, and also needs thinking about at some point -- I've not
> looked at if from the RT angle at all.
> 
> In either case, I think abstracting the lock/unlock sequences out to
> make the purpose clearer may be a good idea, even if we just have a
> single local lock to manage.

So the two looks you suggested make sense. However I would need to
replace this preempt_disable() with one such lock. A local_lock() on -RT
is a per-CPU spin_lock. RT's local_lock gets currently transformed into
preempt_disable() on !RT configs.

> There is one place where I mess with the FPSIMD context no lock held
> because of a need to copy_from_user() straight into the context backing
> store (we can't copy_from_user() with preemption disabled...)
> I'm not sure whether RT will have any impact on this, but it probably
> needs thinking about.

if no locks are held and the task can be preempted then it also might
happen on PREEMPT config. But copy_from_user() doesn't sounds like
something that will go straight to HW.

> One more comment below...
> 
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> >  arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index e7226c4c7493..3a5cd1908874 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -38,6 +38,7 @@
> >  #include <linux/signal.h>
> >  #include <linux/slab.h>
> >  #include <linux/sysctl.h>
> > +#include <linux/locallock.h>
> >  
> >  #include <asm/fpsimd.h>
> >  #include <asm/cputype.h>
> > @@ -235,7 +236,7 @@ static void sve_user_enable(void)
> >   *    whether TIF_SVE is clear or set, since these are not vector length
> >   *    dependent.
> >   */
> > -
> > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> >  /*
> >   * Update current's FPSIMD/SVE registers from thread_struct.
> >   *
> > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> >  	 * non-SVE thread.
> >  	 */
> >  	if (task == current) {
> > +		local_lock(fpsimd_lock);
> >  		local_bh_disable();
> >  
> >  		task_fpsimd_save();
> > @@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
> >  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> >  		sve_to_fpsimd(task);
> >  
> > -	if (task == current)
> > +	if (task == current) {
> > +		local_unlock(fpsimd_lock);
> 
> Is this misordered against local_bh_enable(), or doesn't it matter?

It actually is but it does not matter. On RT local_bh_disable() is
turned into migrate_disable() what in turn disables the migration of the
task to another CPU (while it is still preemtible). This
migrate_disable() is also part of local_lock(). Maybe I will add a
local_lock_bh() and replace this with this local_bh_disable() so it will
better :)

> >  		local_bh_enable();
> > +	}

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-23 14:31     ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-23 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-05-17 19:19:43 [+0100], Dave Martin wrote:
> On Thu, May 17, 2018 at 02:40:06PM +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> 
> Also, watch out for [1] which adds more of this stuff for KVM.  This
> not merged yet, but likely to land in v4.18.

yay. I will try to keep this in mind while moving forward.

> Anyway:
> 
> What we're really trying to achieve with the local_bh_disable/enable
> stuff is exclusive access to the CPU FPSIMD registers and associated
> metadata that tracks who they belong to.

I assumed this.

> > The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
> > is not working on -RT. We don't use NEON in kernel mode on RT right now
> > but this still should be addressed.
> 
> I think we effectively have two levels of locking here.
> 
> At the outer level, we want exclusive access to the FPSIMD registers.
> This is what is needed between kernel_neon_begin() and
> kernel_neon_end(), and maps onto the preempt_disable()/_enable() done
> by these functions.
> 
> In context switch critical code, that's insufficient, and we also
> need exclusive access to the metadata that tracks which task or context
> owns the FPSIMD registers.  This is what the local_bh_disable()/
> _enable() achieves.
> 
> 
> So does it make sense to have two locks (I'm assuming local locks are
> implicitly percpu ?)
> 
> static inline void local_fpsimd_context_lock(void)
> {
> 	local_bh_disable();
> 	local_lock(fpsimd_lock);
> 	local_lock(fpsimd_context_lock);
> }
> 
> static inline void local_fpsimd_context_unlock(void)
> {
> 	local_unlock(fpsimd_context_lock);
> 	local_unlock(fpsimd_lock);
> 	local_bh_enable();
> }
> 
> 
> kernel_neon_begin() could then do
> 
> 	local_fpsimd_context_lock();
> 
> 	/* ... */
> 
> 	preempt_disable();
> 	local_unlock(fpsimd_context_lock);
> 
> ... with the following in kernel_neon_end():
> 
> 	local_unlock(fpsimd_lock);
> 	preempt_enable();
> 
> 
> If kernel-mode NEON was considered harmful to RT due to the context
> switch overheads, then the above might be overkill.  SVE will be worse
> in that regard, and also needs thinking about at some point -- I've not
> looked at if from the RT angle at all.
> 
> In either case, I think abstracting the lock/unlock sequences out to
> make the purpose clearer may be a good idea, even if we just have a
> single local lock to manage.

So the two looks you suggested make sense. However I would need to
replace this preempt_disable() with one such lock. A local_lock() on -RT
is a per-CPU spin_lock. RT's local_lock gets currently transformed into
preempt_disable() on !RT configs.

> There is one place where I mess with the FPSIMD context no lock held
> because of a need to copy_from_user() straight into the context backing
> store (we can't copy_from_user() with preemption disabled...)
> I'm not sure whether RT will have any impact on this, but it probably
> needs thinking about.

if no locks are held and the task can be preempted then it also might
happen on PREEMPT config. But copy_from_user() doesn't sounds like
something that will go straight to HW.

> One more comment below...
> 
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> >  arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index e7226c4c7493..3a5cd1908874 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -38,6 +38,7 @@
> >  #include <linux/signal.h>
> >  #include <linux/slab.h>
> >  #include <linux/sysctl.h>
> > +#include <linux/locallock.h>
> >  
> >  #include <asm/fpsimd.h>
> >  #include <asm/cputype.h>
> > @@ -235,7 +236,7 @@ static void sve_user_enable(void)
> >   *    whether TIF_SVE is clear or set, since these are not vector length
> >   *    dependent.
> >   */
> > -
> > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> >  /*
> >   * Update current's FPSIMD/SVE registers from thread_struct.
> >   *
> > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> >  	 * non-SVE thread.
> >  	 */
> >  	if (task == current) {
> > +		local_lock(fpsimd_lock);
> >  		local_bh_disable();
> >  
> >  		task_fpsimd_save();
> > @@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
> >  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> >  		sve_to_fpsimd(task);
> >  
> > -	if (task == current)
> > +	if (task == current) {
> > +		local_unlock(fpsimd_lock);
> 
> Is this misordered against local_bh_enable(), or doesn't it matter?

It actually is but it does not matter. On RT local_bh_disable() is
turned into migrate_disable() what in turn disables the migration of the
task to another CPU (while it is still preemtible). This
migrate_disable() is also part of local_lock(). Maybe I will add a
local_lock_bh() and replace this with this local_bh_disable() so it will
better :)

> >  		local_bh_enable();
> > +	}

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-18 12:46     ` Dave Martin
@ 2018-05-23 14:34       ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-23 14:34 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel,
	Steven Rostedt, tglx, linux-arm-kernel

On 2018-05-18 13:46:36 [+0100], Dave Martin wrote:
> On Thu, May 17, 2018 at 07:19:43PM +0100, Dave Martin wrote:
> 
> [...]
> 
> > kernel_neon_begin() could then do
> > 
> > 	local_fpsimd_context_lock();
> > 
> > 	/* ... */
> > 
> > 	preempt_disable();
> > 	local_unlock(fpsimd_context_lock);
> > 
> > ... with the following in kernel_neon_end():
> > 
> > 	local_unlock(fpsimd_lock);
> > 	preempt_enable();
> > 
> > 
> > If kernel-mode NEON was considered harmful to RT due to the context
> > switch overheads, then the above might be overkill.  SVE will be worse
> > in that regard, and also needs thinking about at some point -- I've not
> > looked at if from the RT angle at all.
> 
> Hmmm, !KERNEL_MODE_NEON breaks EFI, so this probably does want looking
> at.  Ard's recent rework to enable voluntary preemption the crypto
> backends for arm64 [1] should reduce the fpsimd_lock blackouts, but it
> still depends on the backends playing nice.

oh. I've seen the series, I more or less asked for this and I am glad
for it. I will try to unbreak this EFI problem then…
I remember the good old days when the boot loader was gone after it
jumped to the kernel…

> Cheers
> ---Dave

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-23 14:34       ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-23 14:34 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-05-18 13:46:36 [+0100], Dave Martin wrote:
> On Thu, May 17, 2018 at 07:19:43PM +0100, Dave Martin wrote:
> 
> [...]
> 
> > kernel_neon_begin() could then do
> > 
> > 	local_fpsimd_context_lock();
> > 
> > 	/* ... */
> > 
> > 	preempt_disable();
> > 	local_unlock(fpsimd_context_lock);
> > 
> > ... with the following in kernel_neon_end():
> > 
> > 	local_unlock(fpsimd_lock);
> > 	preempt_enable();
> > 
> > 
> > If kernel-mode NEON was considered harmful to RT due to the context
> > switch overheads, then the above might be overkill.  SVE will be worse
> > in that regard, and also needs thinking about at some point -- I've not
> > looked at if from the RT angle at all.
> 
> Hmmm, !KERNEL_MODE_NEON breaks EFI, so this probably does want looking
> at.  Ard's recent rework to enable voluntary preemption the crypto
> backends for arm64 [1] should reduce the fpsimd_lock blackouts, but it
> still depends on the backends playing nice.

oh. I've seen the series, I more or less asked for this and I am glad
for it. I will try to unbreak this EFI problem then?
I remember the good old days when the boot loader was gone after it
jumped to the kernel?

> Cheers
> ---Dave

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-23 14:31     ` Sebastian Andrzej Siewior
@ 2018-05-23 14:55       ` Dave Martin
  -1 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-05-23 14:55 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel,
	Steven Rostedt, tglx, linux-arm-kernel

On Wed, May 23, 2018 at 04:31:56PM +0200, Sebastian Andrzej Siewior wrote:
> On 2018-05-17 19:19:43 [+0100], Dave Martin wrote:
> > On Thu, May 17, 2018 at 02:40:06PM +0200, Sebastian Andrzej Siewior wrote:
> > > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > > code disables BH and expects that it is not preemptible. On -RT the
> > > task remains preemptible but remains the same CPU. This may corrupt the
> > 
> > Also, watch out for [1] which adds more of this stuff for KVM.  This
> > not merged yet, but likely to land in v4.18.
> 
> yay. I will try to keep this in mind while moving forward.
> 
> > Anyway:
> > 
> > What we're really trying to achieve with the local_bh_disable/enable
> > stuff is exclusive access to the CPU FPSIMD registers and associated
> > metadata that tracks who they belong to.
> 
> I assumed this.

Yup, it's a common pattern cross-arch.

> > > The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
> > > is not working on -RT. We don't use NEON in kernel mode on RT right now
> > > but this still should be addressed.
> > 
> > I think we effectively have two levels of locking here.
> > 
> > At the outer level, we want exclusive access to the FPSIMD registers.
> > This is what is needed between kernel_neon_begin() and
> > kernel_neon_end(), and maps onto the preempt_disable()/_enable() done
> > by these functions.
> > 
> > In context switch critical code, that's insufficient, and we also
> > need exclusive access to the metadata that tracks which task or context
> > owns the FPSIMD registers.  This is what the local_bh_disable()/
> > _enable() achieves.
> > 
> > 
> > So does it make sense to have two locks (I'm assuming local locks are
> > implicitly percpu ?)
> > 
> > static inline void local_fpsimd_context_lock(void)
> > {
> > 	local_bh_disable();
> > 	local_lock(fpsimd_lock);
> > 	local_lock(fpsimd_context_lock);
> > }
> > 
> > static inline void local_fpsimd_context_unlock(void)
> > {
> > 	local_unlock(fpsimd_context_lock);
> > 	local_unlock(fpsimd_lock);
> > 	local_bh_enable();
> > }
> > 
> > 
> > kernel_neon_begin() could then do
> > 
> > 	local_fpsimd_context_lock();
> > 
> > 	/* ... */
> > 
> > 	preempt_disable();
> > 	local_unlock(fpsimd_context_lock);
> > 
> > ... with the following in kernel_neon_end():
> > 
> > 	local_unlock(fpsimd_lock);
> > 	preempt_enable();
> > 
> > 
> > If kernel-mode NEON was considered harmful to RT due to the context
> > switch overheads, then the above might be overkill.  SVE will be worse
> > in that regard, and also needs thinking about at some point -- I've not
> > looked at if from the RT angle at all.
> > 
> > In either case, I think abstracting the lock/unlock sequences out to
> > make the purpose clearer may be a good idea, even if we just have a
> > single local lock to manage.
> 
> So the two looks you suggested make sense. However I would need to
> replace this preempt_disable() with one such lock. A local_lock() on -RT
> is a per-CPU spin_lock. RT's local_lock gets currently transformed into
> preempt_disable() on !RT configs.

Thinking about this again, I'm guessing it only really makes sense to
have two local locks if there are two independent reasons to inhibit
migration.

There is only one such reason here: preempt_disable() for the purpose
of using the FPSIMD registers, where local_bh_disable() implies this
and also inhibits local softirqs -- which I'm guessing works just
the same with RT.


So maybe there should really be only one local lock as you suggest.

This gives:

static inline void local_fpsimd_context_lock(void)
{
	local_lock(fpsimd_lock);
	local_bh_disable();
}

static inline void local_fpsimd_context_unlock(void)
{
	local_bh_enable();
	local_unlock(fpsimd_lock);
}

kernel_neon_begin(...)
{
	local_lock(fpsimd_lock);
	local_bh_disable();

	/* ... */

	local_bh_enable();
}

kernel_neon_end(...)
{
	/* ... */

	local_unlock(fpsimd_lock);
}

If the local_{,un}lock() gets transmuted to preempt_{dis,en}enable()
for !RT, then this seems to map on to what we currently have.

(Still guessing about how RT works here, so a health warning is
implied.)


If you abstract things this way, can you please split the non-RT changes
into a separate patch and Cc me?  That would be a nice cleanup for
mainline rather than just having the bare local_bh_ and preempt_ stuff
everywhere.

> 
> > There is one place where I mess with the FPSIMD context no lock held
> > because of a need to copy_from_user() straight into the context backing
> > store (we can't copy_from_user() with preemption disabled...)
> > I'm not sure whether RT will have any impact on this, but it probably
> > needs thinking about.
> 
> if no locks are held and the task can be preempted then it also might
> happen on PREEMPT config. But copy_from_user() doesn't sounds like
> something that will go straight to HW.

We're just copying to memory, so I guess so long as program order is
respected when the task is migrated (which is ensured via heavy barriers
somewhere in the scheduler and/or arch context switch code IIRC) then I
think there should be no problem).  This is a prerequisite for
preemptive migration to work at all, not just on RT.  So I suppose
there's no new problem here.

> 
> > One more comment below...
> > 
> > > 
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > ---
> > >  arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
> > >  1 file changed, 18 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > > index e7226c4c7493..3a5cd1908874 100644
> > > --- a/arch/arm64/kernel/fpsimd.c
> > > +++ b/arch/arm64/kernel/fpsimd.c
> > > @@ -38,6 +38,7 @@
> > >  #include <linux/signal.h>
> > >  #include <linux/slab.h>
> > >  #include <linux/sysctl.h>
> > > +#include <linux/locallock.h>
> > >  
> > >  #include <asm/fpsimd.h>
> > >  #include <asm/cputype.h>
> > > @@ -235,7 +236,7 @@ static void sve_user_enable(void)
> > >   *    whether TIF_SVE is clear or set, since these are not vector length
> > >   *    dependent.
> > >   */
> > > -
> > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > >  /*
> > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > >   *
> > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > >  	 * non-SVE thread.
> > >  	 */
> > >  	if (task == current) {
> > > +		local_lock(fpsimd_lock);
> > >  		local_bh_disable();
> > >  
> > >  		task_fpsimd_save();
> > > @@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
> > >  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> > >  		sve_to_fpsimd(task);
> > >  
> > > -	if (task == current)
> > > +	if (task == current) {
> > > +		local_unlock(fpsimd_lock);
> > 
> > Is this misordered against local_bh_enable(), or doesn't it matter?
> 
> It actually is but it does not matter. On RT local_bh_disable() is
> turned into migrate_disable() what in turn disables the migration of the
> task to another CPU (while it is still preemtible). This
> migrate_disable() is also part of local_lock(). Maybe I will add a
> local_lock_bh() and replace this with this local_bh_disable() so it will
> better :)

Fair enough.

Cheers
---Dave

> 
> > >  		local_bh_enable();
> > > +	}

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-05-23 14:55       ` Dave Martin
  0 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-05-23 14:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, May 23, 2018 at 04:31:56PM +0200, Sebastian Andrzej Siewior wrote:
> On 2018-05-17 19:19:43 [+0100], Dave Martin wrote:
> > On Thu, May 17, 2018 at 02:40:06PM +0200, Sebastian Andrzej Siewior wrote:
> > > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > > code disables BH and expects that it is not preemptible. On -RT the
> > > task remains preemptible but remains the same CPU. This may corrupt the
> > 
> > Also, watch out for [1] which adds more of this stuff for KVM.  This
> > not merged yet, but likely to land in v4.18.
> 
> yay. I will try to keep this in mind while moving forward.
> 
> > Anyway:
> > 
> > What we're really trying to achieve with the local_bh_disable/enable
> > stuff is exclusive access to the CPU FPSIMD registers and associated
> > metadata that tracks who they belong to.
> 
> I assumed this.

Yup, it's a common pattern cross-arch.

> > > The preempt_disable() + local_bh_enable() combo in kernel_neon_begin()
> > > is not working on -RT. We don't use NEON in kernel mode on RT right now
> > > but this still should be addressed.
> > 
> > I think we effectively have two levels of locking here.
> > 
> > At the outer level, we want exclusive access to the FPSIMD registers.
> > This is what is needed between kernel_neon_begin() and
> > kernel_neon_end(), and maps onto the preempt_disable()/_enable() done
> > by these functions.
> > 
> > In context switch critical code, that's insufficient, and we also
> > need exclusive access to the metadata that tracks which task or context
> > owns the FPSIMD registers.  This is what the local_bh_disable()/
> > _enable() achieves.
> > 
> > 
> > So does it make sense to have two locks (I'm assuming local locks are
> > implicitly percpu ?)
> > 
> > static inline void local_fpsimd_context_lock(void)
> > {
> > 	local_bh_disable();
> > 	local_lock(fpsimd_lock);
> > 	local_lock(fpsimd_context_lock);
> > }
> > 
> > static inline void local_fpsimd_context_unlock(void)
> > {
> > 	local_unlock(fpsimd_context_lock);
> > 	local_unlock(fpsimd_lock);
> > 	local_bh_enable();
> > }
> > 
> > 
> > kernel_neon_begin() could then do
> > 
> > 	local_fpsimd_context_lock();
> > 
> > 	/* ... */
> > 
> > 	preempt_disable();
> > 	local_unlock(fpsimd_context_lock);
> > 
> > ... with the following in kernel_neon_end():
> > 
> > 	local_unlock(fpsimd_lock);
> > 	preempt_enable();
> > 
> > 
> > If kernel-mode NEON was considered harmful to RT due to the context
> > switch overheads, then the above might be overkill.  SVE will be worse
> > in that regard, and also needs thinking about at some point -- I've not
> > looked at if from the RT angle at all.
> > 
> > In either case, I think abstracting the lock/unlock sequences out to
> > make the purpose clearer may be a good idea, even if we just have a
> > single local lock to manage.
> 
> So the two looks you suggested make sense. However I would need to
> replace this preempt_disable() with one such lock. A local_lock() on -RT
> is a per-CPU spin_lock. RT's local_lock gets currently transformed into
> preempt_disable() on !RT configs.

Thinking about this again, I'm guessing it only really makes sense to
have two local locks if there are two independent reasons to inhibit
migration.

There is only one such reason here: preempt_disable() for the purpose
of using the FPSIMD registers, where local_bh_disable() implies this
and also inhibits local softirqs -- which I'm guessing works just
the same with RT.


So maybe there should really be only one local lock as you suggest.

This gives:

static inline void local_fpsimd_context_lock(void)
{
	local_lock(fpsimd_lock);
	local_bh_disable();
}

static inline void local_fpsimd_context_unlock(void)
{
	local_bh_enable();
	local_unlock(fpsimd_lock);
}

kernel_neon_begin(...)
{
	local_lock(fpsimd_lock);
	local_bh_disable();

	/* ... */

	local_bh_enable();
}

kernel_neon_end(...)
{
	/* ... */

	local_unlock(fpsimd_lock);
}

If the local_{,un}lock() gets transmuted to preempt_{dis,en}enable()
for !RT, then this seems to map on to what we currently have.

(Still guessing about how RT works here, so a health warning is
implied.)


If you abstract things this way, can you please split the non-RT changes
into a separate patch and Cc me?  That would be a nice cleanup for
mainline rather than just having the bare local_bh_ and preempt_ stuff
everywhere.

> 
> > There is one place where I mess with the FPSIMD context no lock held
> > because of a need to copy_from_user() straight into the context backing
> > store (we can't copy_from_user() with preemption disabled...)
> > I'm not sure whether RT will have any impact on this, but it probably
> > needs thinking about.
> 
> if no locks are held and the task can be preempted then it also might
> happen on PREEMPT config. But copy_from_user() doesn't sounds like
> something that will go straight to HW.

We're just copying to memory, so I guess so long as program order is
respected when the task is migrated (which is ensured via heavy barriers
somewhere in the scheduler and/or arch context switch code IIRC) then I
think there should be no problem).  This is a prerequisite for
preemptive migration to work at all, not just on RT.  So I suppose
there's no new problem here.

> 
> > One more comment below...
> > 
> > > 
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > ---
> > >  arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++--
> > >  1 file changed, 18 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > > index e7226c4c7493..3a5cd1908874 100644
> > > --- a/arch/arm64/kernel/fpsimd.c
> > > +++ b/arch/arm64/kernel/fpsimd.c
> > > @@ -38,6 +38,7 @@
> > >  #include <linux/signal.h>
> > >  #include <linux/slab.h>
> > >  #include <linux/sysctl.h>
> > > +#include <linux/locallock.h>
> > >  
> > >  #include <asm/fpsimd.h>
> > >  #include <asm/cputype.h>
> > > @@ -235,7 +236,7 @@ static void sve_user_enable(void)
> > >   *    whether TIF_SVE is clear or set, since these are not vector length
> > >   *    dependent.
> > >   */
> > > -
> > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > >  /*
> > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > >   *
> > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > >  	 * non-SVE thread.
> > >  	 */
> > >  	if (task == current) {
> > > +		local_lock(fpsimd_lock);
> > >  		local_bh_disable();
> > >  
> > >  		task_fpsimd_save();
> > > @@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
> > >  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> > >  		sve_to_fpsimd(task);
> > >  
> > > -	if (task == current)
> > > +	if (task == current) {
> > > +		local_unlock(fpsimd_lock);
> > 
> > Is this misordered against local_bh_enable(), or doesn't it matter?
> 
> It actually is but it does not matter. On RT local_bh_disable() is
> turned into migrate_disable() what in turn disables the migration of the
> task to another CPU (while it is still preemtible). This
> migrate_disable() is also part of local_lock(). Maybe I will add a
> local_lock_bh() and replace this with this local_bh_disable() so it will
> better :)

Fair enough.

Cheers
---Dave

> 
> > >  		local_bh_enable();
> > > +	}

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-05-22 17:33         ` Sebastian Andrzej Siewior
@ 2018-07-11 13:25           ` Steven Rostedt
  -1 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-07-11 13:25 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On Tue, 22 May 2018 19:33:33 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-05-22 13:24:29 [-0400], Steven Rostedt wrote:
> > On Tue, 22 May 2018 19:21:16 +0200
> > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >   
> > > On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:  
> > > > On Thu, 17 May 2018 14:40:06 +0200
> > > > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > > >     
> > > > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > > > >  /*
> > > > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > > > >   *
> > > > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > > > >  	 * non-SVE thread.
> > > > >  	 */
> > > > >  	if (task == current) {
> > > > > +		local_lock(fpsimd_lock);
> > > > >  		local_bh_disable();    
> > > > 
> > > > I'm surprised that we don't have a "local_lock_bh()"?    
> > > 
> > > right. Like the last time when we introduced a global lock with no
> > > locking context? 
> > >   
> > 
> > I meant, we could have local_lock_bh(fpsimd_lock); that would turn into
> > a local_bh_disable() when !PREEMPT_RT.  
> 
> Oh that part. That could be possible I guess. I need to look into the
> second part which disables preemption while the FPU is taken.
>

Did you decide to create a local_lock_bh(lock) function? I don't see it.

And should this be backported to 4.14-rt too? You state you saw this in
4.16-rt, but did you start doing something different then, or did the
kernel change?

Thanks!

-- Steve

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-11 13:25           ` Steven Rostedt
  0 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-07-11 13:25 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 22 May 2018 19:33:33 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-05-22 13:24:29 [-0400], Steven Rostedt wrote:
> > On Tue, 22 May 2018 19:21:16 +0200
> > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >   
> > > On 2018-05-22 13:10:04 [-0400], Steven Rostedt wrote:  
> > > > On Thu, 17 May 2018 14:40:06 +0200
> > > > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > > >     
> > > > > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> > > > >  /*
> > > > >   * Update current's FPSIMD/SVE registers from thread_struct.
> > > > >   *
> > > > > @@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
> > > > >  	 * non-SVE thread.
> > > > >  	 */
> > > > >  	if (task == current) {
> > > > > +		local_lock(fpsimd_lock);
> > > > >  		local_bh_disable();    
> > > > 
> > > > I'm surprised that we don't have a "local_lock_bh()"?    
> > > 
> > > right. Like the last time when we introduced a global lock with no
> > > locking context? 
> > >   
> > 
> > I meant, we could have local_lock_bh(fpsimd_lock); that would turn into
> > a local_bh_disable() when !PREEMPT_RT.  
> 
> Oh that part. That could be possible I guess. I need to look into the
> second part which disables preemption while the FPU is taken.
>

Did you decide to create a local_lock_bh(lock) function? I don't see it.

And should this be backported to 4.14-rt too? You state you saw this in
4.16-rt, but did you start doing something different then, or did the
kernel change?

Thanks!

-- Steve

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-11 13:25           ` Steven Rostedt
@ 2018-07-11 13:31             ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-11 13:31 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On 2018-07-11 09:25:55 [-0400], Steven Rostedt wrote:
> Did you decide to create a local_lock_bh(lock) function? I don't see it.
> 
> And should this be backported to 4.14-rt too? You state you saw this in
> 4.16-rt, but did you start doing something different then, or did the
> kernel change?

I wanted to re-evaluate the whole situation here but didn't get to it
yet.

> Thanks!
> 
> -- Steve

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-11 13:31             ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-11 13:31 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-11 09:25:55 [-0400], Steven Rostedt wrote:
> Did you decide to create a local_lock_bh(lock) function? I don't see it.
> 
> And should this be backported to 4.14-rt too? You state you saw this in
> 4.16-rt, but did you start doing something different then, or did the
> kernel change?

I wanted to re-evaluate the whole situation here but didn't get to it
yet.

> Thanks!
> 
> -- Steve

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-11 13:31             ` Sebastian Andrzej Siewior
@ 2018-07-11 13:33               ` Steven Rostedt
  -1 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-07-11 13:33 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On Wed, 11 Jul 2018 15:31:57 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-07-11 09:25:55 [-0400], Steven Rostedt wrote:
> > Did you decide to create a local_lock_bh(lock) function? I don't see it.
> > 
> > And should this be backported to 4.14-rt too? You state you saw this in
> > 4.16-rt, but did you start doing something different then, or did the
> > kernel change?  
> 
> I wanted to re-evaluate the whole situation here but didn't get to it
> yet.

Thanks, I wont add this then.

-- Steve

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-11 13:33               ` Steven Rostedt
  0 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-07-11 13:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 11 Jul 2018 15:31:57 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-07-11 09:25:55 [-0400], Steven Rostedt wrote:
> > Did you decide to create a local_lock_bh(lock) function? I don't see it.
> > 
> > And should this be backported to 4.14-rt too? You state you saw this in
> > 4.16-rt, but did you start doing something different then, or did the
> > kernel change?  
> 
> I wanted to re-evaluate the whole situation here but didn't get to it
> yet.

Thanks, I wont add this then.

-- Steve

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-11 13:31             ` Sebastian Andrzej Siewior
  (?)
  (?)
@ 2018-07-11 17:07             ` Mike Galbraith
  -1 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-11 17:07 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-rt-users

On Wed, 2018-07-11 at 15:31 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-11 09:25:55 [-0400], Steven Rostedt wrote:
> > Did you decide to create a local_lock_bh(lock) function? I don't see it.
> > 
> > And should this be backported to 4.14-rt too? You state you saw this in
> > 4.16-rt, but did you start doing something different then, or did the
> > kernel change?
> 
> I wanted to re-evaluate the whole situation here but didn't get to it
> yet.

Ignore the rfc I sent then.  New toy works well enough meanwhile.

(cavium boxen are a LOT less dinky/cute than I'm used to thinking of
arm gizmos being.. 224 core box boogies right along)

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-11 13:33               ` Steven Rostedt
@ 2018-07-13 17:49                 ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-13 17:49 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel, Mike Galbraith

In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.
Add a locallock around this process. This avoids that the any function
within the locallock block is invoked more than once on the same CPU.

The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
state of registers used for in-kernel-work. We would require additional storage
for the in-kernel copy of the registers. But then the NEON-crypto checks for
the need-resched flag so it shouldn't that bad.
The preempt_disable() avoids the context switch while the kernel uses the SIMD
registers. Unfortunately we have to balance out the migrate_disable() counter
because local_lock_bh() is invoked in different context compared to its unlock
counterpart.

__efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
preempt_disable() context and instead save the registers always in its
extra spot on RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---

This seems to make work (crypto chacha20-neon + cyclictest). I have no
EFI so I have no clue if saving SIMD while calling to EFI works.

 arch/arm64/kernel/fpsimd.c |   47 ++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -38,6 +38,7 @@
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/locallock.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
@@ -235,7 +236,7 @@ static void sve_user_enable(void)
  *    whether TIF_SVE is clear or set, since these are not vector length
  *    dependent.
  */
-
+static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
 /*
  * Update current's FPSIMD/SVE registers from thread_struct.
  *
@@ -594,7 +595,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
-		local_bh_disable();
+		local_lock_bh(fpsimd_lock);
 
 		task_fpsimd_save();
 		set_thread_flag(TIF_FOREIGN_FPSTATE);
@@ -605,7 +606,7 @@ int sve_set_vector_length(struct task_st
 		sve_to_fpsimd(task);
 
 	if (task == current)
-		local_bh_enable();
+		local_unlock_bh(fpsimd_lock);
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,7 +838,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	task_fpsimd_save();
 	fpsimd_to_sve(current);
@@ -849,7 +850,7 @@ asmlinkage void do_sve_acc(unsigned int
 	if (test_and_set_thread_flag(TIF_SVE))
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -925,7 +926,7 @@ void fpsimd_flush_thread(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
@@ -967,7 +968,7 @@ void fpsimd_flush_thread(void)
 
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -979,9 +980,9 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 	task_fpsimd_save();
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -1021,14 +1022,14 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		task_fpsimd_load();
 		fpsimd_bind_to_cpu();
 	}
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -1041,7 +1042,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1052,7 +1053,7 @@ void fpsimd_update_current_state(struct
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_bind_to_cpu();
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	__this_cpu_write(kernel_neon_busy, true);
 
@@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
 	fpsimd_flush_cpu_state();
 
 	preempt_disable();
-
-	local_bh_enable();
+	/*
+	 * ballance atomic vs !atomic context of migrate_disable().
+	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
+	 */
+	migrate_disable();
+	migrate_disable();
+	migrate_disable();
+	local_unlock_bh(fpsimd_lock);
 }
 EXPORT_SYMBOL(kernel_neon_begin);
 
@@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
 	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
 
 	preempt_enable();
+	/* balance migrate_disable(). See kernel_neon_begin() */
+	migrate_enable();
+	migrate_enable();
+	migrate_enable();
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
@@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	WARN_ON(preemptible());
-
-	if (may_use_simd()) {
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
 		kernel_neon_begin();
 	} else {
 		/*

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-13 17:49                 ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-13 17:49 UTC (permalink / raw)
  To: linux-arm-kernel

In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.
Add a locallock around this process. This avoids that the any function
within the locallock block is invoked more than once on the same CPU.

The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
state of registers used for in-kernel-work. We would require additional storage
for the in-kernel copy of the registers. But then the NEON-crypto checks for
the need-resched flag so it shouldn't that bad.
The preempt_disable() avoids the context switch while the kernel uses the SIMD
registers. Unfortunately we have to balance out the migrate_disable() counter
because local_lock_bh() is invoked in different context compared to its unlock
counterpart.

__efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
preempt_disable() context and instead save the registers always in its
extra spot on RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---

This seems to make work (crypto chacha20-neon + cyclictest). I have no
EFI so I have no clue if saving SIMD while calling to EFI works.

 arch/arm64/kernel/fpsimd.c |   47 ++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -38,6 +38,7 @@
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/locallock.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
@@ -235,7 +236,7 @@ static void sve_user_enable(void)
  *    whether TIF_SVE is clear or set, since these are not vector length
  *    dependent.
  */
-
+static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
 /*
  * Update current's FPSIMD/SVE registers from thread_struct.
  *
@@ -594,7 +595,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
-		local_bh_disable();
+		local_lock_bh(fpsimd_lock);
 
 		task_fpsimd_save();
 		set_thread_flag(TIF_FOREIGN_FPSTATE);
@@ -605,7 +606,7 @@ int sve_set_vector_length(struct task_st
 		sve_to_fpsimd(task);
 
 	if (task == current)
-		local_bh_enable();
+		local_unlock_bh(fpsimd_lock);
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,7 +838,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	task_fpsimd_save();
 	fpsimd_to_sve(current);
@@ -849,7 +850,7 @@ asmlinkage void do_sve_acc(unsigned int
 	if (test_and_set_thread_flag(TIF_SVE))
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -925,7 +926,7 @@ void fpsimd_flush_thread(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
@@ -967,7 +968,7 @@ void fpsimd_flush_thread(void)
 
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -979,9 +980,9 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 	task_fpsimd_save();
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -1021,14 +1022,14 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		task_fpsimd_load();
 		fpsimd_bind_to_cpu();
 	}
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -1041,7 +1042,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1052,7 +1053,7 @@ void fpsimd_update_current_state(struct
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_bind_to_cpu();
 
-	local_bh_enable();
+	local_unlock_bh(fpsimd_lock);
 }
 
 /*
@@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
-	local_bh_disable();
+	local_lock_bh(fpsimd_lock);
 
 	__this_cpu_write(kernel_neon_busy, true);
 
@@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
 	fpsimd_flush_cpu_state();
 
 	preempt_disable();
-
-	local_bh_enable();
+	/*
+	 * ballance atomic vs !atomic context of migrate_disable().
+	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
+	 */
+	migrate_disable();
+	migrate_disable();
+	migrate_disable();
+	local_unlock_bh(fpsimd_lock);
 }
 EXPORT_SYMBOL(kernel_neon_begin);
 
@@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
 	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
 
 	preempt_enable();
+	/* balance migrate_disable(). See kernel_neon_begin() */
+	migrate_enable();
+	migrate_enable();
+	migrate_enable();
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
@@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	WARN_ON(preemptible());
-
-	if (may_use_simd()) {
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
 		kernel_neon_begin();
 	} else {
 		/*

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT] locallock: add local_lock_bh()
  2018-07-13 17:49                 ` Sebastian Andrzej Siewior
@ 2018-07-13 17:50                   ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-13 17:50 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel, Mike Galbraith

For the ARM64 simd locking it would be easier to have local_lock_bh()
which grabs a local_lock with BH disabled and turns into a
local_bh_disable() on !RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
obviously required by the previous one…

 include/linux/locallock.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/include/linux/locallock.h b/include/linux/locallock.h
index 921eab83cd34..15aa0dea2bfb 100644
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -47,9 +47,23 @@ static inline void __local_lock(struct local_irq_lock *lv)
 	lv->nestcnt++;
 }
 
+static inline void __local_lock_bh(struct local_irq_lock *lv)
+{
+	if (lv->owner != current) {
+		spin_lock_bh(&lv->lock);
+		LL_WARN(lv->owner);
+		LL_WARN(lv->nestcnt);
+		lv->owner = current;
+	}
+	lv->nestcnt++;
+}
+
 #define local_lock(lvar)					\
 	do { __local_lock(&get_local_var(lvar)); } while (0)
 
+#define local_lock_bh(lvar)					\
+	do { __local_lock_bh(&get_local_var(lvar)); } while (0)
+
 #define local_lock_on(lvar, cpu)				\
 	do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
 
@@ -88,12 +102,29 @@ static inline void __local_unlock(struct local_irq_lock *lv)
 	spin_unlock(&lv->lock);
 }
 
+static inline void __local_unlock_bh(struct local_irq_lock *lv)
+{
+	LL_WARN(lv->nestcnt == 0);
+	LL_WARN(lv->owner != current);
+	if (--lv->nestcnt)
+		return;
+
+	lv->owner = NULL;
+	spin_unlock_bh(&lv->lock);
+}
+
 #define local_unlock(lvar)					\
 	do {							\
 		__local_unlock(this_cpu_ptr(&lvar));		\
 		put_local_var(lvar);				\
 	} while (0)
 
+#define local_unlock_bh(lvar)					\
+	do {							\
+		__local_unlock_bh(this_cpu_ptr(&lvar));		\
+		put_local_var(lvar);				\
+	} while (0)
+
 #define local_unlock_on(lvar, cpu)                       \
 	do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
 
@@ -253,6 +284,8 @@ static inline void local_irq_lock_init(int lvar) { }
 
 #define local_lock(lvar)			preempt_disable()
 #define local_unlock(lvar)			preempt_enable()
+#define local_lock_bh(lvar)			local_bh_disable()
+#define local_unlock_bh(lvar)			local_bh_enable()
 #define local_lock_irq(lvar)			local_irq_disable()
 #define local_lock_irq_on(lvar, cpu)		local_irq_disable()
 #define local_unlock_irq(lvar)			local_irq_enable()
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH RT] locallock: add local_lock_bh()
@ 2018-07-13 17:50                   ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-13 17:50 UTC (permalink / raw)
  To: linux-arm-kernel

For the ARM64 simd locking it would be easier to have local_lock_bh()
which grabs a local_lock with BH disabled and turns into a
local_bh_disable() on !RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
obviously required by the previous one?

 include/linux/locallock.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/include/linux/locallock.h b/include/linux/locallock.h
index 921eab83cd34..15aa0dea2bfb 100644
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -47,9 +47,23 @@ static inline void __local_lock(struct local_irq_lock *lv)
 	lv->nestcnt++;
 }
 
+static inline void __local_lock_bh(struct local_irq_lock *lv)
+{
+	if (lv->owner != current) {
+		spin_lock_bh(&lv->lock);
+		LL_WARN(lv->owner);
+		LL_WARN(lv->nestcnt);
+		lv->owner = current;
+	}
+	lv->nestcnt++;
+}
+
 #define local_lock(lvar)					\
 	do { __local_lock(&get_local_var(lvar)); } while (0)
 
+#define local_lock_bh(lvar)					\
+	do { __local_lock_bh(&get_local_var(lvar)); } while (0)
+
 #define local_lock_on(lvar, cpu)				\
 	do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
 
@@ -88,12 +102,29 @@ static inline void __local_unlock(struct local_irq_lock *lv)
 	spin_unlock(&lv->lock);
 }
 
+static inline void __local_unlock_bh(struct local_irq_lock *lv)
+{
+	LL_WARN(lv->nestcnt == 0);
+	LL_WARN(lv->owner != current);
+	if (--lv->nestcnt)
+		return;
+
+	lv->owner = NULL;
+	spin_unlock_bh(&lv->lock);
+}
+
 #define local_unlock(lvar)					\
 	do {							\
 		__local_unlock(this_cpu_ptr(&lvar));		\
 		put_local_var(lvar);				\
 	} while (0)
 
+#define local_unlock_bh(lvar)					\
+	do {							\
+		__local_unlock_bh(this_cpu_ptr(&lvar));		\
+		put_local_var(lvar);				\
+	} while (0)
+
 #define local_unlock_on(lvar, cpu)                       \
 	do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
 
@@ -253,6 +284,8 @@ static inline void local_irq_lock_init(int lvar) { }
 
 #define local_lock(lvar)			preempt_disable()
 #define local_unlock(lvar)			preempt_enable()
+#define local_lock_bh(lvar)			local_bh_disable()
+#define local_unlock_bh(lvar)			local_bh_enable()
 #define local_lock_irq(lvar)			local_irq_disable()
 #define local_lock_irq_on(lvar, cpu)		local_irq_disable()
 #define local_unlock_irq(lvar)			local_irq_enable()
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-13 17:49                 ` Sebastian Andrzej Siewior
@ 2018-07-13 22:03                   ` Mike Galbraith
  -1 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-13 22:03 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the
> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> Add a locallock around this process. This avoids that the any function
> within the locallock block is invoked more than once on the same CPU.
> 
> The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> state of registers used for in-kernel-work. We would require additional storage
> for the in-kernel copy of the registers. But then the NEON-crypto checks for
> the need-resched flag so it shouldn't that bad.
> The preempt_disable() avoids the context switch while the kernel uses the SIMD
> registers. Unfortunately we have to balance out the migrate_disable() counter
> because local_lock_bh() is invoked in different context compared to its unlock
> counterpart.
> 
> __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> preempt_disable() context and instead save the registers always in its
> extra spot on RT.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> 
> This seems to make work (crypto chacha20-neon + cyclictest). I have no
> EFI so I have no clue if saving SIMD while calling to EFI works.

All is not well on cavium test box.  I'm seeing random errors ala...

./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023

...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
as well, which is unsurprising if it's related to fpsimd woes.  Box
does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.

To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
the problem. (relevant? dunno, it may be unrelated to fpsimd.c).

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-13 22:03                   ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-13 22:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the
> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> Add a locallock around this process. This avoids that the any function
> within the locallock block is invoked more than once on the same CPU.
> 
> The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> state of registers used for in-kernel-work. We would require additional storage
> for the in-kernel copy of the registers. But then the NEON-crypto checks for
> the need-resched flag so it shouldn't that bad.
> The preempt_disable() avoids the context switch while the kernel uses the SIMD
> registers. Unfortunately we have to balance out the migrate_disable() counter
> because local_lock_bh() is invoked in different context compared to its unlock
> counterpart.
> 
> __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> preempt_disable() context and instead save the registers always in its
> extra spot on RT.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> 
> This seems to make work (crypto chacha20-neon + cyclictest). I have no
> EFI so I have no clue if saving SIMD while calling to EFI works.

All is not well on cavium test box.  I'm seeing random errors ala...

./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023

...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
as well, which is unsurprising if it's related to fpsimd woes.  Box
does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.

To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
the problem. (relevant? dunno, it may be unrelated to fpsimd.c).

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-13 22:03                   ` Mike Galbraith
  (?)
@ 2018-07-15  7:22                     ` Mike Galbraith
  -1 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-15  7:22 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

On Sat, 2018-07-14 at 00:03 +0200, Mike Galbraith wrote:
> On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> > content of the SIMD registers if the task is preempted during
> > saving/restoring those registers.
> > Add a locallock around this process. This avoids that the any function
> > within the locallock block is invoked more than once on the same CPU.
> > 
> > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > state of registers used for in-kernel-work. We would require additional storage
> > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > the need-resched flag so it shouldn't that bad.
> > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > registers. Unfortunately we have to balance out the migrate_disable() counter
> > because local_lock_bh() is invoked in different context compared to its unlock
> > counterpart.
> > 
> > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> > preempt_disable() context and instead save the registers always in its
> > extra spot on RT.
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> > 
> > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > EFI so I have no clue if saving SIMD while calling to EFI works.
> 
> All is not well on cavium test box.  I'm seeing random errors ala...
> 
> ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> 
> ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> as well, which is unsurprising if it's related to fpsimd woes.  Box
> does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.

Verified to be SIMD woes.  I backported your V2 to 4.14-rt, and the
CPUS*2 kbuild still reliably reproduced the corruption issue.  I then
did the below to both 4.14-rt and 4.16-rt, and the corruption is gone.

(this looks a bit like a patch, but is actually a functional yellow
sticky should I need to come back for another poke at it later)

arm64: fpsimd: disable preemption for RT where that is assumed

1. Per Sebastian's analysis, kernel_neon_begin() can't be made preemptible:
If the task-switch notices TIF_FOREIGN_FPSTATE then it would restore task's
SIMD state and we lose the state of registers used for in-kernel-work.  We
would require additional storage for the in-kernel copy of the registers.
But then the NEON-crypto checks for the need-resched flag so it shouldn't
that bad.

2. arch_efi_call_virt_setup/teardown() encapsulate __efi_fpsimd_begin/end()
in preempt disabled sections via efi_virtmap_load/unload().  That could be
fixed, but... 

3. A local lock solution which left preempt disabled sections 1 & 2 intact
failed, CPUS*2 parallel kbuild reliably reproduced memory corruption.

Given the two non-preemptible sections which could encapsulate something
painful remained intact with the local lock solution, and the fact that
the remaining BH disabled sections are all small, with empirical evidence
at hand that at LEAST one truely does require preemption be disabled,
the best solution for both RT and !RT is to simply disable preemption for
RT where !RT assumes preemption has been disabled.  That adds no cycles
to the !RT case, fewer cycles to the RT case, and requires no (ugly) work
around for the consequences of local_unlock() under preempt_disable().

Signed-off-by: Mike Galbraith <efault@gmx.de>
---
 arch/arm64/kernel/fpsimd.c |   18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -594,6 +594,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		preempt_disable_rt();
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +605,10 @@ int sve_set_vector_length(struct task_st
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
 		local_bh_enable();
+		preempt_enable_rt();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	task_fpsimd_save();
@@ -850,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -925,6 +930,7 @@ void fpsimd_flush_thread(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
@@ -968,6 +974,7 @@ void fpsimd_flush_thread(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -979,9 +986,11 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 	task_fpsimd_save();
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1021,6 +1030,7 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1029,6 +1039,7 @@ void fpsimd_restore_current_state(void)
 	}
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1041,6 +1052,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
@@ -1053,6 +1065,7 @@ void fpsimd_update_current_state(struct
 		fpsimd_bind_to_cpu();
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1115,6 +1128,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
+	preempt_disable();
 	local_bh_disable();
 
 	__this_cpu_write(kernel_neon_busy, true);
@@ -1128,8 +1142,6 @@ void kernel_neon_begin(void)
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
 
-	preempt_disable();
-
 	local_bh_enable();
 }
 EXPORT_SYMBOL(kernel_neon_begin);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-15  7:22                     ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-15  7:22 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Steven Rostedt
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel, tglx,
	linux-arm-kernel

On Sat, 2018-07-14 at 00:03 +0200, Mike Galbraith wrote:
> On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> > content of the SIMD registers if the task is preempted during
> > saving/restoring those registers.
> > Add a locallock around this process. This avoids that the any function
> > within the locallock block is invoked more than once on the same CPU.
> > 
> > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > state of registers used for in-kernel-work. We would require additional storage
> > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > the need-resched flag so it shouldn't that bad.
> > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > registers. Unfortunately we have to balance out the migrate_disable() counter
> > because local_lock_bh() is invoked in different context compared to its unlock
> > counterpart.
> > 
> > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> > preempt_disable() context and instead save the registers always in its
> > extra spot on RT.
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> > 
> > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > EFI so I have no clue if saving SIMD while calling to EFI works.
> 
> All is not well on cavium test box.  I'm seeing random errors ala...
> 
> ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> 
> ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> as well, which is unsurprising if it's related to fpsimd woes.  Box
> does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.

Verified to be SIMD woes.  I backported your V2 to 4.14-rt, and the
CPUS*2 kbuild still reliably reproduced the corruption issue.  I then
did the below to both 4.14-rt and 4.16-rt, and the corruption is gone.

(this looks a bit like a patch, but is actually a functional yellow
sticky should I need to come back for another poke at it later)

arm64: fpsimd: disable preemption for RT where that is assumed

1. Per Sebastian's analysis, kernel_neon_begin() can't be made preemptible:
If the task-switch notices TIF_FOREIGN_FPSTATE then it would restore task's
SIMD state and we lose the state of registers used for in-kernel-work.  We
would require additional storage for the in-kernel copy of the registers.
But then the NEON-crypto checks for the need-resched flag so it shouldn't
that bad.

2. arch_efi_call_virt_setup/teardown() encapsulate __efi_fpsimd_begin/end()
in preempt disabled sections via efi_virtmap_load/unload().  That could be
fixed, but... 

3. A local lock solution which left preempt disabled sections 1 & 2 intact
failed, CPUS*2 parallel kbuild reliably reproduced memory corruption.

Given the two non-preemptible sections which could encapsulate something
painful remained intact with the local lock solution, and the fact that
the remaining BH disabled sections are all small, with empirical evidence
at hand that at LEAST one truely does require preemption be disabled,
the best solution for both RT and !RT is to simply disable preemption for
RT where !RT assumes preemption has been disabled.  That adds no cycles
to the !RT case, fewer cycles to the RT case, and requires no (ugly) work
around for the consequences of local_unlock() under preempt_disable().

Signed-off-by: Mike Galbraith <efault@gmx.de>
---
 arch/arm64/kernel/fpsimd.c |   18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -594,6 +594,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		preempt_disable_rt();
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +605,10 @@ int sve_set_vector_length(struct task_st
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
 		local_bh_enable();
+		preempt_enable_rt();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	task_fpsimd_save();
@@ -850,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -925,6 +930,7 @@ void fpsimd_flush_thread(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
@@ -968,6 +974,7 @@ void fpsimd_flush_thread(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -979,9 +986,11 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 	task_fpsimd_save();
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1021,6 +1030,7 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1029,6 +1039,7 @@ void fpsimd_restore_current_state(void)
 	}
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1041,6 +1052,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
@@ -1053,6 +1065,7 @@ void fpsimd_update_current_state(struct
 		fpsimd_bind_to_cpu();
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1115,6 +1128,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
+	preempt_disable();
 	local_bh_disable();
 
 	__this_cpu_write(kernel_neon_busy, true);
@@ -1128,8 +1142,6 @@ void kernel_neon_begin(void)
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
 
-	preempt_disable();
-
 	local_bh_enable();
 }
 EXPORT_SYMBOL(kernel_neon_begin);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-15  7:22                     ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-15  7:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, 2018-07-14 at 00:03 +0200, Mike Galbraith wrote:
> On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> > content of the SIMD registers if the task is preempted during
> > saving/restoring those registers.
> > Add a locallock around this process. This avoids that the any function
> > within the locallock block is invoked more than once on the same CPU.
> > 
> > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > state of registers used for in-kernel-work. We would require additional storage
> > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > the need-resched flag so it shouldn't that bad.
> > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > registers. Unfortunately we have to balance out the migrate_disable() counter
> > because local_lock_bh() is invoked in different context compared to its unlock
> > counterpart.
> > 
> > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> > preempt_disable() context and instead save the registers always in its
> > extra spot on RT.
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> > 
> > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > EFI so I have no clue if saving SIMD while calling to EFI works.
> 
> All is not well on cavium test box.  I'm seeing random errors ala...
> 
> ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> 
> ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> as well, which is unsurprising if it's related to fpsimd woes.  Box
> does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.

Verified to be SIMD woes.  I backported your V2 to 4.14-rt, and the
CPUS*2 kbuild still reliably reproduced the corruption issue.  I then
did the below to both 4.14-rt and 4.16-rt, and the corruption is gone.

(this looks a bit like a patch, but is actually a functional yellow
sticky should I need to come back for another poke at it later)

arm64: fpsimd: disable preemption for RT where that is assumed

1. Per Sebastian's analysis, kernel_neon_begin() can't be made preemptible:
If the task-switch notices TIF_FOREIGN_FPSTATE then it would restore task's
SIMD state and we lose the state of registers used for in-kernel-work.  We
would require additional storage for the in-kernel copy of the registers.
But then the NEON-crypto checks for the need-resched flag so it shouldn't
that bad.

2. arch_efi_call_virt_setup/teardown() encapsulate __efi_fpsimd_begin/end()
in preempt disabled sections via efi_virtmap_load/unload().  That could be
fixed, but... 

3. A local lock solution which left preempt disabled sections 1 & 2 intact
failed, CPUS*2 parallel kbuild reliably reproduced memory corruption.

Given the two non-preemptible sections which could encapsulate something
painful remained intact with the local lock solution, and the fact that
the remaining BH disabled sections are all small, with empirical evidence
at hand that at LEAST one truely does require preemption be disabled,
the best solution for both RT and !RT is to simply disable preemption for
RT where !RT assumes preemption has been disabled.  That adds no cycles
to the !RT case, fewer cycles to the RT case, and requires no (ugly) work
around for the consequences of local_unlock() under preempt_disable().

Signed-off-by: Mike Galbraith <efault@gmx.de>
---
 arch/arm64/kernel/fpsimd.c |   18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -594,6 +594,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		preempt_disable_rt();
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +605,10 @@ int sve_set_vector_length(struct task_st
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
 		local_bh_enable();
+		preempt_enable_rt();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	task_fpsimd_save();
@@ -850,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -925,6 +930,7 @@ void fpsimd_flush_thread(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
@@ -968,6 +974,7 @@ void fpsimd_flush_thread(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -979,9 +986,11 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 	task_fpsimd_save();
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1021,6 +1030,7 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1029,6 +1039,7 @@ void fpsimd_restore_current_state(void)
 	}
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1041,6 +1052,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable_rt();
 	local_bh_disable();
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
@@ -1053,6 +1065,7 @@ void fpsimd_update_current_state(struct
 		fpsimd_bind_to_cpu();
 
 	local_bh_enable();
+	preempt_enable_rt();
 }
 
 /*
@@ -1115,6 +1128,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
+	preempt_disable();
 	local_bh_disable();
 
 	__this_cpu_write(kernel_neon_busy, true);
@@ -1128,8 +1142,6 @@ void kernel_neon_begin(void)
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
 
-	preempt_disable();
-
 	local_bh_enable();
 }
 EXPORT_SYMBOL(kernel_neon_begin);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-13 17:49                 ` Sebastian Andrzej Siewior
@ 2018-07-16 15:17                   ` Dave Martin
  -1 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-07-16 15:17 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Steven Rostedt, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On Fri, Jul 13, 2018 at 07:49:38PM +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the
> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> Add a locallock around this process. This avoids that the any function
> within the locallock block is invoked more than once on the same CPU.

The overall shape of this looks reasonable -- I have some comments and
concerns though, below.

I'm to blame for much of the current shape of this code, so please Cc
me on reposts.

> The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> state of registers used for in-kernel-work. We would require additional storage

Are you trying to say that a kernel_neon_begin()...kernel_neon_end()
critical section can't be preemptible?

Whether kernel_neon_begin() itself is preemptible is more of an
implementation detail.

> for the in-kernel copy of the registers. But then the NEON-crypto checks for
> the need-resched flag so it shouldn't that bad.

This sounds like an (understandably) confused description of how the
existing code is intended to work, rather than what the patch does.

Can we say something along the lines of:

"kernel_neon_begin() ... kernel_neon_end() are intended to delimit a
critical section where the executing context has exclusive access to
the cpu's FPSIMD/SVE registers.  This means that we must not allow
preemption by another context or migration to another cpu during this
region [...]"

(Not trying to be pedantic here: this -rt patch should help to clarify
the intended semantics of the code here beyond what is currently
explicit.  What I truly intended is less clear than I'd like... even to
me.)

> The preempt_disable() avoids the context switch while the kernel uses the SIMD
> registers. Unfortunately we have to balance out the migrate_disable() counter
> because local_lock_bh() is invoked in different context compared to its unlock
> counterpart.
> 
> __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its

Confused -- is this a typo for "kernel_neon_begin()"?

> preempt_disable() context and instead save the registers always in its
> extra spot on RT.

I'm not sure this works.  Some EFI runtime service calls are interruptible,
so we arrange here to stash off the state as normal if an EFI call is
made from an interruptible context; otherwise we stash in a dedicated
percpu side buffer and restore eagerly in __efi_fpsimd_end().

If we now unconditionally use the side-buffer with
IS_ENABLED(CONFIG_PREEMPT_RT_BASE), I think a nested EFI call will save
state over the outer context's saved state, leading to corruption of the
vector registers.

TBH, I think EFI calls need to be non-preemptible and non-migratable.
I doubt anything else will conform to the EFI spec.

IIRC EFI runtime services are not specified to be in any way real-time,
but I don't think there's a lot we can do about that.

> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> 
> This seems to make work (crypto chacha20-neon + cyclictest). I have no
> EFI so I have no clue if saving SIMD while calling to EFI works.
> 
>  arch/arm64/kernel/fpsimd.c |   47 ++++++++++++++++++++++++++-------------------
>  1 file changed, 28 insertions(+), 19 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -38,6 +38,7 @@
>  #include <linux/signal.h>
>  #include <linux/slab.h>
>  #include <linux/sysctl.h>
> +#include <linux/locallock.h>
>  
>  #include <asm/fpsimd.h>
>  #include <asm/cputype.h>
> @@ -235,7 +236,7 @@ static void sve_user_enable(void)
>   *    whether TIF_SVE is clear or set, since these are not vector length
>   *    dependent.
>   */
> -
> +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);

Does this really disable IRQs, and if so, why?

If so, this creates an IRQ blackout around task_fpsimd_save(), which is
something we explicitly tried to avoid in the original design.  It can
be costly.

Also, this belongs alongside the declaration for fpsimd_last_state,
since that's (part of) what the lock protects.

>  /*
>   * Update current's FPSIMD/SVE registers from thread_struct.
>   *
> @@ -594,7 +595,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> -		local_bh_disable();
> +		local_lock_bh(fpsimd_lock);
>  
>  		task_fpsimd_save();
>  		set_thread_flag(TIF_FOREIGN_FPSTATE);
> @@ -605,7 +606,7 @@ int sve_set_vector_length(struct task_st
>  		sve_to_fpsimd(task);
>  
>  	if (task == current)
> -		local_bh_enable();
> +		local_unlock_bh(fpsimd_lock);
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,7 +838,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	task_fpsimd_save();
>  	fpsimd_to_sve(current);
> @@ -849,7 +850,7 @@ asmlinkage void do_sve_acc(unsigned int
>  	if (test_and_set_thread_flag(TIF_SVE))
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -925,7 +926,7 @@ void fpsimd_flush_thread(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
>  	fpsimd_flush_task_state(current);
> @@ -967,7 +968,7 @@ void fpsimd_flush_thread(void)
>  
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -979,9 +980,9 @@ void fpsimd_preserve_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  	task_fpsimd_save();
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -1021,14 +1022,14 @@ void fpsimd_restore_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>  		task_fpsimd_load();
>  		fpsimd_bind_to_cpu();
>  	}
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -1041,7 +1042,7 @@ void fpsimd_update_current_state(struct
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
>  	if (system_supports_sve() && test_thread_flag(TIF_SVE))
> @@ -1052,7 +1053,7 @@ void fpsimd_update_current_state(struct
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
>  		fpsimd_bind_to_cpu();
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	__this_cpu_write(kernel_neon_busy, true);
>  
> @@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
>  	fpsimd_flush_cpu_state();
>  
>  	preempt_disable();
> -
> -	local_bh_enable();
> +	/*
> +	 * ballance atomic vs !atomic context of migrate_disable().
> +	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
> +	 */
> +	migrate_disable();
> +	migrate_disable();
> +	migrate_disable();
> +	local_unlock_bh(fpsimd_lock);

This looks fragile.

Do we really need to do it 3 times?

Showing my ignorance here, but I'd naively expect that the migrate-
disableness changes as follows:

	/* 0 */
	local_lock_bh(); /* +3 */

	...

	preempt_disable(); /* +3 */

	migrate_disable(); /* +4 */
	migrate_disable(); /* +5 */
	migrate_disable(); /* +6 */

	local_unlock_bh(); /* +3 */


If so, I don't understand why it's not OK to call migrate_disable()
just once, leaving the count at +1  (?)

I'm making assumptions about the relationship between preempt_disable()
and migrate_disable() here.

>  }
>  EXPORT_SYMBOL(kernel_neon_begin);
>  
> @@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
>  	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
>  
>  	preempt_enable();
> +	/* balance migrate_disable(). See kernel_neon_begin() */
> +	migrate_enable();
> +	migrate_enable();
> +	migrate_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
>  
> @@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	WARN_ON(preemptible());
> -
> -	if (may_use_simd()) {
> +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {

I suspect this is wrong -- see comments on the commit message.

>  		kernel_neon_begin();
>  	} else {

[...]

Cheers
---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-16 15:17                   ` Dave Martin
  0 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-07-16 15:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 13, 2018 at 07:49:38PM +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the
> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> Add a locallock around this process. This avoids that the any function
> within the locallock block is invoked more than once on the same CPU.

The overall shape of this looks reasonable -- I have some comments and
concerns though, below.

I'm to blame for much of the current shape of this code, so please Cc
me on reposts.

> The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> state of registers used for in-kernel-work. We would require additional storage

Are you trying to say that a kernel_neon_begin()...kernel_neon_end()
critical section can't be preemptible?

Whether kernel_neon_begin() itself is preemptible is more of an
implementation detail.

> for the in-kernel copy of the registers. But then the NEON-crypto checks for
> the need-resched flag so it shouldn't that bad.

This sounds like an (understandably) confused description of how the
existing code is intended to work, rather than what the patch does.

Can we say something along the lines of:

"kernel_neon_begin() ... kernel_neon_end() are intended to delimit a
critical section where the executing context has exclusive access to
the cpu's FPSIMD/SVE registers.  This means that we must not allow
preemption by another context or migration to another cpu during this
region [...]"

(Not trying to be pedantic here: this -rt patch should help to clarify
the intended semantics of the code here beyond what is currently
explicit.  What I truly intended is less clear than I'd like... even to
me.)

> The preempt_disable() avoids the context switch while the kernel uses the SIMD
> registers. Unfortunately we have to balance out the migrate_disable() counter
> because local_lock_bh() is invoked in different context compared to its unlock
> counterpart.
> 
> __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its

Confused -- is this a typo for "kernel_neon_begin()"?

> preempt_disable() context and instead save the registers always in its
> extra spot on RT.

I'm not sure this works.  Some EFI runtime service calls are interruptible,
so we arrange here to stash off the state as normal if an EFI call is
made from an interruptible context; otherwise we stash in a dedicated
percpu side buffer and restore eagerly in __efi_fpsimd_end().

If we now unconditionally use the side-buffer with
IS_ENABLED(CONFIG_PREEMPT_RT_BASE), I think a nested EFI call will save
state over the outer context's saved state, leading to corruption of the
vector registers.

TBH, I think EFI calls need to be non-preemptible and non-migratable.
I doubt anything else will conform to the EFI spec.

IIRC EFI runtime services are not specified to be in any way real-time,
but I don't think there's a lot we can do about that.

> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> 
> This seems to make work (crypto chacha20-neon + cyclictest). I have no
> EFI so I have no clue if saving SIMD while calling to EFI works.
> 
>  arch/arm64/kernel/fpsimd.c |   47 ++++++++++++++++++++++++++-------------------
>  1 file changed, 28 insertions(+), 19 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -38,6 +38,7 @@
>  #include <linux/signal.h>
>  #include <linux/slab.h>
>  #include <linux/sysctl.h>
> +#include <linux/locallock.h>
>  
>  #include <asm/fpsimd.h>
>  #include <asm/cputype.h>
> @@ -235,7 +236,7 @@ static void sve_user_enable(void)
>   *    whether TIF_SVE is clear or set, since these are not vector length
>   *    dependent.
>   */
> -
> +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);

Does this really disable IRQs, and if so, why?

If so, this creates an IRQ blackout around task_fpsimd_save(), which is
something we explicitly tried to avoid in the original design.  It can
be costly.

Also, this belongs alongside the declaration for fpsimd_last_state,
since that's (part of) what the lock protects.

>  /*
>   * Update current's FPSIMD/SVE registers from thread_struct.
>   *
> @@ -594,7 +595,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> -		local_bh_disable();
> +		local_lock_bh(fpsimd_lock);
>  
>  		task_fpsimd_save();
>  		set_thread_flag(TIF_FOREIGN_FPSTATE);
> @@ -605,7 +606,7 @@ int sve_set_vector_length(struct task_st
>  		sve_to_fpsimd(task);
>  
>  	if (task == current)
> -		local_bh_enable();
> +		local_unlock_bh(fpsimd_lock);
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,7 +838,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	task_fpsimd_save();
>  	fpsimd_to_sve(current);
> @@ -849,7 +850,7 @@ asmlinkage void do_sve_acc(unsigned int
>  	if (test_and_set_thread_flag(TIF_SVE))
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -925,7 +926,7 @@ void fpsimd_flush_thread(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
>  	fpsimd_flush_task_state(current);
> @@ -967,7 +968,7 @@ void fpsimd_flush_thread(void)
>  
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -979,9 +980,9 @@ void fpsimd_preserve_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  	task_fpsimd_save();
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -1021,14 +1022,14 @@ void fpsimd_restore_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>  		task_fpsimd_load();
>  		fpsimd_bind_to_cpu();
>  	}
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -1041,7 +1042,7 @@ void fpsimd_update_current_state(struct
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
>  	if (system_supports_sve() && test_thread_flag(TIF_SVE))
> @@ -1052,7 +1053,7 @@ void fpsimd_update_current_state(struct
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
>  		fpsimd_bind_to_cpu();
>  
> -	local_bh_enable();
> +	local_unlock_bh(fpsimd_lock);
>  }
>  
>  /*
> @@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> -	local_bh_disable();
> +	local_lock_bh(fpsimd_lock);
>  
>  	__this_cpu_write(kernel_neon_busy, true);
>  
> @@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
>  	fpsimd_flush_cpu_state();
>  
>  	preempt_disable();
> -
> -	local_bh_enable();
> +	/*
> +	 * ballance atomic vs !atomic context of migrate_disable().
> +	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
> +	 */
> +	migrate_disable();
> +	migrate_disable();
> +	migrate_disable();
> +	local_unlock_bh(fpsimd_lock);

This looks fragile.

Do we really need to do it 3 times?

Showing my ignorance here, but I'd naively expect that the migrate-
disableness changes as follows:

	/* 0 */
	local_lock_bh(); /* +3 */

	...

	preempt_disable(); /* +3 */

	migrate_disable(); /* +4 */
	migrate_disable(); /* +5 */
	migrate_disable(); /* +6 */

	local_unlock_bh(); /* +3 */


If so, I don't understand why it's not OK to call migrate_disable()
just once, leaving the count at +1  (?)

I'm making assumptions about the relationship between preempt_disable()
and migrate_disable() here.

>  }
>  EXPORT_SYMBOL(kernel_neon_begin);
>  
> @@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
>  	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
>  
>  	preempt_enable();
> +	/* balance migrate_disable(). See kernel_neon_begin() */
> +	migrate_enable();
> +	migrate_enable();
> +	migrate_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
>  
> @@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> -	WARN_ON(preemptible());
> -
> -	if (may_use_simd()) {
> +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {

I suspect this is wrong -- see comments on the commit message.

>  		kernel_neon_begin();
>  	} else {

[...]

Cheers
---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-16 15:17                   ` Dave Martin
  (?)
@ 2018-07-18  9:12                   ` Sebastian Andrzej Siewior
  2018-07-18  9:24                       ` Sebastian Andrzej Siewior
  2018-07-24 13:46                       ` Steven Rostedt
  -1 siblings, 2 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18  9:12 UTC (permalink / raw)
  To: Dave Martin
  Cc: Steven Rostedt, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On 2018-07-16 16:17:39 [+0100], Dave Martin wrote:
> On Fri, Jul 13, 2018 at 07:49:38PM +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> > content of the SIMD registers if the task is preempted during
> > saving/restoring those registers.
> > Add a locallock around this process. This avoids that the any function
> > within the locallock block is invoked more than once on the same CPU.
> 
> The overall shape of this looks reasonable -- I have some comments and
> concerns though, below.
> 
> I'm to blame for much of the current shape of this code, so please Cc
> me on reposts.
> 
> > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > state of registers used for in-kernel-work. We would require additional storage
> 
> Are you trying to say that a kernel_neon_begin()...kernel_neon_end()
> critical section can't be preemptible?

yes. I try to describe the current status.

> Whether kernel_neon_begin() itself is preemptible is more of an
> implementation detail.
>
> > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > the need-resched flag so it shouldn't that bad.
> 
> This sounds like an (understandably) confused description of how the
> existing code is intended to work, rather than what the patch does.

What I am trying to say is that kernel_neon_begin() invokes
preempt_disable(). Usually we try to avoid the preempt_disable()
sections because this forbids scheduling of the task (and a task with a
higher priority couldn't be scheduled). Sometimes the preempt_disable()
section is replaced with alternatives / locking of the resource but in
this case there is no alternative because we would lose the in-kernel
registers.

> Can we say something along the lines of:
> 
> "kernel_neon_begin() ... kernel_neon_end() are intended to delimit a
> critical section where the executing context has exclusive access to
> the cpu's FPSIMD/SVE registers.  This means that we must not allow
> preemption by another context or migration to another cpu during this
> region [...]"
> 
> (Not trying to be pedantic here: this -rt patch should help to clarify
> the intended semantics of the code here beyond what is currently
> explicit.  What I truly intended is less clear than I'd like... even to
> me.)

I can update that part.

> > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > registers. Unfortunately we have to balance out the migrate_disable() counter
> > because local_lock_bh() is invoked in different context compared to its unlock
> > counterpart.
> > 
> > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> 
> Confused -- is this a typo for "kernel_neon_begin()"?

Yes.

> > preempt_disable() context and instead save the registers always in its
> > extra spot on RT.
> 
> I'm not sure this works.  Some EFI runtime service calls are interruptible,
> so we arrange here to stash off the state as normal if an EFI call is
> made from an interruptible context; otherwise we stash in a dedicated
> percpu side buffer and restore eagerly in __efi_fpsimd_end().
> 
> If we now unconditionally use the side-buffer with
> IS_ENABLED(CONFIG_PREEMPT_RT_BASE), I think a nested EFI call will save
> state over the outer context's saved state, leading to corruption of the
> vector registers.

Okay. So this is news to me. I assumed that once we are "in" EFI the CPU
is gone/occupied and only visible to interrupts/whatever is once it
leaves EFI. The call is also within a local_irq_disable() section. But
your "interruptible" would mean that there is a backdoor.

> TBH, I think EFI calls need to be non-preemptible and non-migratable.
> I doubt anything else will conform to the EFI spec.
> 
> IIRC EFI runtime services are not specified to be in any way real-time,
> but I don't think there's a lot we can do about that.

I have no idea what this EFI thing can do. Based on the callbacks I saw
is that you can use them to change time. It would bad if the whole
process takes say 50us (it may need to talk to i2c/spi after all) and in
that time 

> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> > 
> > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> >  arch/arm64/kernel/fpsimd.c |   47 ++++++++++++++++++++++++++-------------------
> >  1 file changed, 28 insertions(+), 19 deletions(-)
> > 
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -38,6 +38,7 @@
> >  #include <linux/signal.h>
> >  #include <linux/slab.h>
> >  #include <linux/sysctl.h>
> > +#include <linux/locallock.h>
> >  
> >  #include <asm/fpsimd.h>
> >  #include <asm/cputype.h>
> > @@ -235,7 +236,7 @@ static void sve_user_enable(void)
> >   *    whether TIF_SVE is clear or set, since these are not vector length
> >   *    dependent.
> >   */
> > -
> > +static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
> 
> Does this really disable IRQs, and if so, why?

On RT it is a per-CPU sleeping spin_lock. On !RT it depends on the
calling. A local_lock() would be turned into preempt_disable() and
local_lock_irqsave() would be turned local_irq_save().
Here I used local_lock_bh() so the local_bh_disable() remains.

> If so, this creates an IRQ blackout around task_fpsimd_save(), which is
> something we explicitly tried to avoid in the original design.  It can
> be costly.
> 
> Also, this belongs alongside the declaration for fpsimd_last_state,
> since that's (part of) what the lock protects.
> 
> >  /*
> >   * Update current's FPSIMD/SVE registers from thread_struct.
> >   *
> > @@ -594,7 +595,7 @@ int sve_set_vector_length(struct task_st
> >  	 * non-SVE thread.
> >  	 */
> >  	if (task == current) {
> > -		local_bh_disable();
> > +		local_lock_bh(fpsimd_lock);
> >  
> >  		task_fpsimd_save();
> >  		set_thread_flag(TIF_FOREIGN_FPSTATE);
> > @@ -605,7 +606,7 @@ int sve_set_vector_length(struct task_st
> >  		sve_to_fpsimd(task);
> >  
> >  	if (task == current)
> > -		local_bh_enable();
> > +		local_unlock_bh(fpsimd_lock);
> >  
> >  	/*
> >  	 * Force reallocation of task SVE state to the correct size
> > @@ -837,7 +838,7 @@ asmlinkage void do_sve_acc(unsigned int
> >  
> >  	sve_alloc(current);
> >  
> > -	local_bh_disable();
> > +	local_lock_bh(fpsimd_lock);
> >  
> >  	task_fpsimd_save();
> >  	fpsimd_to_sve(current);
> > @@ -849,7 +850,7 @@ asmlinkage void do_sve_acc(unsigned int
> >  	if (test_and_set_thread_flag(TIF_SVE))
> >  		WARN_ON(1); /* SVE access shouldn't have trapped */
> >  
> > -	local_bh_enable();
> > +	local_unlock_bh(fpsimd_lock);
> >  }
> >  
> >  /*
> > @@ -925,7 +926,7 @@ void fpsimd_flush_thread(void)
> >  	if (!system_supports_fpsimd())
> >  		return;
> >  
> > -	local_bh_disable();
> > +	local_lock_bh(fpsimd_lock);
> >  
> >  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
> >  	fpsimd_flush_task_state(current);
> > @@ -967,7 +968,7 @@ void fpsimd_flush_thread(void)
> >  
> >  	set_thread_flag(TIF_FOREIGN_FPSTATE);
> >  
> > -	local_bh_enable();
> > +	local_unlock_bh(fpsimd_lock);
> >  }
> >  
> >  /*
> > @@ -979,9 +980,9 @@ void fpsimd_preserve_current_state(void)
> >  	if (!system_supports_fpsimd())
> >  		return;
> >  
> > -	local_bh_disable();
> > +	local_lock_bh(fpsimd_lock);
> >  	task_fpsimd_save();
> > -	local_bh_enable();
> > +	local_unlock_bh(fpsimd_lock);
> >  }
> >  
> >  /*
> > @@ -1021,14 +1022,14 @@ void fpsimd_restore_current_state(void)
> >  	if (!system_supports_fpsimd())
> >  		return;
> >  
> > -	local_bh_disable();
> > +	local_lock_bh(fpsimd_lock);
> >  
> >  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> >  		task_fpsimd_load();
> >  		fpsimd_bind_to_cpu();
> >  	}
> >  
> > -	local_bh_enable();
> > +	local_unlock_bh(fpsimd_lock);
> >  }
> >  
> >  /*
> > @@ -1041,7 +1042,7 @@ void fpsimd_update_current_state(struct
> >  	if (!system_supports_fpsimd())
> >  		return;
> >  
> > -	local_bh_disable();
> > +	local_lock_bh(fpsimd_lock);
> >  
> >  	current->thread.fpsimd_state.user_fpsimd = *state;
> >  	if (system_supports_sve() && test_thread_flag(TIF_SVE))
> > @@ -1052,7 +1053,7 @@ void fpsimd_update_current_state(struct
> >  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
> >  		fpsimd_bind_to_cpu();
> >  
> > -	local_bh_enable();
> > +	local_unlock_bh(fpsimd_lock);
> >  }
> >  
> >  /*
> > @@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
> >  
> >  	BUG_ON(!may_use_simd());
> >  
> > -	local_bh_disable();
> > +	local_lock_bh(fpsimd_lock);
> >  
> >  	__this_cpu_write(kernel_neon_busy, true);
> >  
> > @@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
> >  	fpsimd_flush_cpu_state();
> >  
> >  	preempt_disable();
> > -
> > -	local_bh_enable();
> > +	/*
> > +	 * ballance atomic vs !atomic context of migrate_disable().
> > +	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
> > +	 */
> > +	migrate_disable();
> > +	migrate_disable();
> > +	migrate_disable();
> > +	local_unlock_bh(fpsimd_lock);
> 
> This looks fragile.
> 
> Do we really need to do it 3 times?

Unfortunately yes.

> Showing my ignorance here, but I'd naively expect that the migrate-
> disableness changes as follows:
> 
> 	/* 0 */
> 	local_lock_bh(); /* +3 */
> 
> 	...
> 
> 	preempt_disable(); /* +3 */
> 
> 	migrate_disable(); /* +4 */
> 	migrate_disable(); /* +5 */
> 	migrate_disable(); /* +6 */
> 
> 	local_unlock_bh(); /* +3 */
> 
> 
> If so, I don't understand why it's not OK to call migrate_disable()
> just once, leaving the count at +1  (?)
> 
> I'm making assumptions about the relationship between preempt_disable()
> and migrate_disable() here.

Exactly. So local_lock_bh() does +3 which I try to explain why it is 3.
Now migrate_disable() has two counters: One for atomic context (irq or
preemption disabled) and on for non-atomic context. The difference is
that in atomic context migrate_disable() does nothing and in !atomic
context it does other things which can't be done in atomic context
anyway (I can explain this in full detail but you may lose interest so I
keep it short). To update your example:

	/* ATOMIC COUNTER | non-ATOMIC COUNTER  | change */
			 /* 0 | 0 | - */
	local_lock_bh(); /* 0 | 3 | N+3 */
 
 	...
 
	preempt_disable(); /* atomic context */
 
	migrate_disable(); /* 1 | 3 | A+1 */
	migrate_disable(); /* 2 | 3 | A+1 */
	migrate_disable(); /* 3 | 3 | A+1 */
 
	local_unlock_bh(); /* 0 | 3 | A-3 */

/* later */
	preempt_enable();  /* non-atomic context */
	migrate_enable();  /* 0 | 2 | N-1 */
	migrate_enable();  /* 0 | 1 | N-1 */
	migrate_enable();  /* 0 | 0 | N-1 */

> >  }
> >  EXPORT_SYMBOL(kernel_neon_begin);
> >  
> > @@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
> >  	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
> >  
> >  	preempt_enable();
> > +	/* balance migrate_disable(). See kernel_neon_begin() */
> > +	migrate_enable();
> > +	migrate_enable();
> > +	migrate_enable();
> >  }
> >  EXPORT_SYMBOL(kernel_neon_end);
> >  
> > @@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
> >  	if (!system_supports_fpsimd())
> >  		return;
> >  
> > -	WARN_ON(preemptible());
> > -
> > -	if (may_use_simd()) {
> > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
> 
> I suspect this is wrong -- see comments on the commit message.
> 
> >  		kernel_neon_begin();
> >  	} else {
> 
> [...]
> 
> Cheers
> ---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-18  9:12                   ` Sebastian Andrzej Siewior
@ 2018-07-18  9:24                       ` Sebastian Andrzej Siewior
  2018-07-24 13:46                       ` Steven Rostedt
  1 sibling, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18  9:24 UTC (permalink / raw)
  To: Dave Martin
  Cc: Steven Rostedt, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On 2018-07-18 11:12:10 [+0200], To Dave Martin wrote:
> > > -	if (may_use_simd()) {
> > > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
> > 
> > I suspect this is wrong -- see comments on the commit message.

I'm sorry, I pressed send too early, I was aiming for the draft folder.
So yes, based on that EFI that might be interruptible, let me try to
look at the initial issue again and maybe I get another idea how to deal
with this.
One question: If EFI is interruptible that means, we call into EFI - how
do we get out? Does EFI enable interrupts and the kernel receives an
interrupt and treats this EFI call like a regular context switch?

> > >  		kernel_neon_begin();
> > >  	} else {
> > 
> > [...]
> > 
> > Cheers
> > ---Dave

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18  9:24                       ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18  9:24 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-18 11:12:10 [+0200], To Dave Martin wrote:
> > > -	if (may_use_simd()) {
> > > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
> > 
> > I suspect this is wrong -- see comments on the commit message.

I'm sorry, I pressed send too early, I was aiming for the draft folder.
So yes, based on that EFI that might be interruptible, let me try to
look at the initial issue again and maybe I get another idea how to deal
with this.
One question: If EFI is interruptible that means, we call into EFI - how
do we get out? Does EFI enable interrupts and the kernel receives an
interrupt and treats this EFI call like a regular context switch?

> > >  		kernel_neon_begin();
> > >  	} else {
> > 
> > [...]
> > 
> > Cheers
> > ---Dave

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-13 22:03                   ` Mike Galbraith
@ 2018-07-18  9:27                     ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18  9:27 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Steven Rostedt, linux-rt-users, linux-kernel, tglx,
	Catalin Marinas, Will Deacon, linux-arm-kernel

On 2018-07-14 00:03:44 [+0200], Mike Galbraith wrote:
> > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > EFI so I have no clue if saving SIMD while calling to EFI works.
> 
> All is not well on cavium test box.  I'm seeing random errors ala...
> 
> ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> 
> ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> as well, which is unsurprising if it's related to fpsimd woes.  Box
> does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> 
> To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
> containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
> the problem. (relevant? dunno, it may be unrelated to fpsimd.c).

Okay, so you did not test this because you can't compile. But the
"internal compiler error" is usually something the gcc folks are
interested in :)

> 	-Mike

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18  9:27                     ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18  9:27 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-14 00:03:44 [+0200], Mike Galbraith wrote:
> > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > EFI so I have no clue if saving SIMD while calling to EFI works.
> 
> All is not well on cavium test box.  I'm seeing random errors ala...
> 
> ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> 
> ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> as well, which is unsurprising if it's related to fpsimd woes.  Box
> does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> 
> To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
> containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
> the problem. (relevant? dunno, it may be unrelated to fpsimd.c).

Okay, so you did not test this because you can't compile. But the
"internal compiler error" is usually something the gcc folks are
interested in :)

> 	-Mike

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-18  9:27                     ` Sebastian Andrzej Siewior
  (?)
@ 2018-07-18 10:28                       ` Mike Galbraith
  -1 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-18 10:28 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Steven Rostedt, linux-rt-users, linux-kernel, tglx,
	Catalin Marinas, Will Deacon, linux-arm-kernel

On Wed, 2018-07-18 at 11:27 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-14 00:03:44 [+0200], Mike Galbraith wrote:
> > > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> > All is not well on cavium test box.  I'm seeing random errors ala...
> > 
> > ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> > ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> > 
> > ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> > as well, which is unsurprising if it's related to fpsimd woes.  Box
> > does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> > 
> > To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
> > containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
> > the problem. (relevant? dunno, it may be unrelated to fpsimd.c).
> 
> Okay, so you did not test this because you can't compile.

Nope, the running kernel, the one that is doing the segfaulting etc,
has the patches applied.

It is exhibiting that symptom because those patches do not cure this
symptom, one which I verified to be present in virgin 4.14-rt as well. 
The pseudo-patch I sent, disabling preemption where it is assumed to be
disabled instead, does cure it.  With preemption so disabled, I can
beat on affected kernels (>=4.14-rt) as long as I like.

This particular 48 core Cavium is very slow, maybe that makes it easier
to reproduce, dunno.  According to pipe-test, the thing is essentially
a dozen RPi super-glued together.  pipe-test pinned to a single core
can only context switch at ~40KHz with PREEMPT_RT, or ~90 with
NOPREEMPT, comparable to measurement done in real deal RPi.

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18 10:28                       ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-18 10:28 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel,
	Steven Rostedt, tglx, linux-arm-kernel

On Wed, 2018-07-18 at 11:27 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-14 00:03:44 [+0200], Mike Galbraith wrote:
> > > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> > All is not well on cavium test box.  I'm seeing random errors ala...
> > 
> > ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> > ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> > 
> > ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> > as well, which is unsurprising if it's related to fpsimd woes.  Box
> > does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> > 
> > To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
> > containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
> > the problem. (relevant? dunno, it may be unrelated to fpsimd.c).
> 
> Okay, so you did not test this because you can't compile.

Nope, the running kernel, the one that is doing the segfaulting etc,
has the patches applied.

It is exhibiting that symptom because those patches do not cure this
symptom, one which I verified to be present in virgin 4.14-rt as well. 
The pseudo-patch I sent, disabling preemption where it is assumed to be
disabled instead, does cure it.  With preemption so disabled, I can
beat on affected kernels (>=4.14-rt) as long as I like.

This particular 48 core Cavium is very slow, maybe that makes it easier
to reproduce, dunno.  According to pipe-test, the thing is essentially
a dozen RPi super-glued together.  pipe-test pinned to a single core
can only context switch at ~40KHz with PREEMPT_RT, or ~90 with
NOPREEMPT, comparable to measurement done in real deal RPi.

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18 10:28                       ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-18 10:28 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2018-07-18 at 11:27 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-14 00:03:44 [+0200], Mike Galbraith wrote:
> > > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> > All is not well on cavium test box.  I'm seeing random errors ala...
> > 
> > ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> > ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> > 
> > ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> > as well, which is unsurprising if it's related to fpsimd woes.  Box
> > does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> > 
> > To file under FWIW, arm64 configured SLE15-RT, 4.12 based kernel
> > containing virgin @stable arch/arm64/kernel/fpsimd.c, does not exhibit
> > the problem. (relevant? dunno, it may be unrelated to fpsimd.c).
> 
> Okay, so you did not test this because you can't compile.

Nope, the running kernel, the one that is doing the segfaulting etc,
has the patches applied.

It is exhibiting that symptom because those patches do not cure this
symptom, one which I verified to be present in virgin 4.14-rt as well. 
The pseudo-patch I sent, disabling preemption where it is assumed to be
disabled instead, does cure it.  With preemption so disabled, I can
beat on affected kernels (>=4.14-rt) as long as I like.

This particular 48 core Cavium is very slow, maybe that makes it easier
to reproduce, dunno.  According to pipe-test, the thing is essentially
a dozen RPi super-glued together.  pipe-test pinned to a single core
can only context switch at ~40KHz with PREEMPT_RT, or ~90 with
NOPREEMPT, comparable to measurement done in real deal RPi.

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-15  7:22                     ` Mike Galbraith
  (?)
@ 2018-07-18 10:30                       ` Mike Galbraith
  -1 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-18 10:30 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Steven Rostedt
  Cc: linux-rt-users, linux-kernel, tglx, Catalin Marinas, Will Deacon,
	linux-arm-kernel

See pseudo-patch below.  That cures the reported gcc gripeage.

On Sun, 2018-07-15 at 09:22 +0200, Mike Galbraith wrote:
> On Sat, 2018-07-14 at 00:03 +0200, Mike Galbraith wrote:
> > On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> > > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > > code disables BH and expects that it is not preemptible. On -RT the
> > > task remains preemptible but remains the same CPU. This may corrupt the
> > > content of the SIMD registers if the task is preempted during
> > > saving/restoring those registers.
> > > Add a locallock around this process. This avoids that the any function
> > > within the locallock block is invoked more than once on the same CPU.
> > > 
> > > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > > state of registers used for in-kernel-work. We would require additional storage
> > > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > > the need-resched flag so it shouldn't that bad.
> > > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > > registers. Unfortunately we have to balance out the migrate_disable() counter
> > > because local_lock_bh() is invoked in different context compared to its unlock
> > > counterpart.
> > > 
> > > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> > > preempt_disable() context and instead save the registers always in its
> > > extra spot on RT.
> > > 
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > ---
> > > 
> > > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> > All is not well on cavium test box.  I'm seeing random errors ala...
> > 
> > ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> > ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> > 
> > ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> > as well, which is unsurprising if it's related to fpsimd woes.  Box
> > does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> 
> Verified to be SIMD woes.  I backported your V2 to 4.14-rt, and the
> CPUS*2 kbuild still reliably reproduced the corruption issue.  I then
> did the below to both 4.14-rt and 4.16-rt, and the corruption is gone.
> 
> (this looks a bit like a patch, but is actually a functional yellow
> sticky should I need to come back for another poke at it later)
> 
> arm64: fpsimd: disable preemption for RT where that is assumed
> 
> 1. Per Sebastian's analysis, kernel_neon_begin() can't be made preemptible:
> If the task-switch notices TIF_FOREIGN_FPSTATE then it would restore task's
> SIMD state and we lose the state of registers used for in-kernel-work.  We
> would require additional storage for the in-kernel copy of the registers.
> But then the NEON-crypto checks for the need-resched flag so it shouldn't
> that bad.
> 
> 2. arch_efi_call_virt_setup/teardown() encapsulate __efi_fpsimd_begin/end()
> in preempt disabled sections via efi_virtmap_load/unload().  That could be
> fixed, but... 
> 
> 3. A local lock solution which left preempt disabled sections 1 & 2 intact
> failed, CPUS*2 parallel kbuild reliably reproduced memory corruption.
> 
> Given the two non-preemptible sections which could encapsulate something
> painful remained intact with the local lock solution, and the fact that
> the remaining BH disabled sections are all small, with empirical evidence
> at hand that at LEAST one truely does require preemption be disabled,
> the best solution for both RT and !RT is to simply disable preemption for
> RT where !RT assumes preemption has been disabled.  That adds no cycles
> to the !RT case, fewer cycles to the RT case, and requires no (ugly) work
> around for the consequences of local_unlock() under preempt_disable().
> 
> Signed-off-by: Mike Galbraith <efault@gmx.de>
> ---
>  arch/arm64/kernel/fpsimd.c |   18 +++++++++++++++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -594,6 +594,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		preempt_disable_rt();
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +605,10 @@ int sve_set_vector_length(struct task_st
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
>  		local_bh_enable();
> +		preempt_enable_rt();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	task_fpsimd_save();
> @@ -850,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -925,6 +930,7 @@ void fpsimd_flush_thread(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
> @@ -968,6 +974,7 @@ void fpsimd_flush_thread(void)
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -979,9 +986,11 @@ void fpsimd_preserve_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  	task_fpsimd_save();
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1021,6 +1030,7 @@ void fpsimd_restore_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> @@ -1029,6 +1039,7 @@ void fpsimd_restore_current_state(void)
>  	}
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1041,6 +1052,7 @@ void fpsimd_update_current_state(struct
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
> @@ -1053,6 +1065,7 @@ void fpsimd_update_current_state(struct
>  		fpsimd_bind_to_cpu();
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1115,6 +1128,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> +	preempt_disable();
>  	local_bh_disable();
>  
>  	__this_cpu_write(kernel_neon_busy, true);
> @@ -1128,8 +1142,6 @@ void kernel_neon_begin(void)
>  	/* Invalidate any task state remaining in the fpsimd regs: */
>  	fpsimd_flush_cpu_state();
>  
> -	preempt_disable();
> -
>  	local_bh_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_begin);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18 10:30                       ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-18 10:30 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Steven Rostedt
  Cc: linux-rt-users, Catalin Marinas, Will Deacon, linux-kernel, tglx,
	linux-arm-kernel

See pseudo-patch below.  That cures the reported gcc gripeage.

On Sun, 2018-07-15 at 09:22 +0200, Mike Galbraith wrote:
> On Sat, 2018-07-14 at 00:03 +0200, Mike Galbraith wrote:
> > On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> > > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > > code disables BH and expects that it is not preemptible. On -RT the
> > > task remains preemptible but remains the same CPU. This may corrupt the
> > > content of the SIMD registers if the task is preempted during
> > > saving/restoring those registers.
> > > Add a locallock around this process. This avoids that the any function
> > > within the locallock block is invoked more than once on the same CPU.
> > > 
> > > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > > state of registers used for in-kernel-work. We would require additional storage
> > > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > > the need-resched flag so it shouldn't that bad.
> > > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > > registers. Unfortunately we have to balance out the migrate_disable() counter
> > > because local_lock_bh() is invoked in different context compared to its unlock
> > > counterpart.
> > > 
> > > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> > > preempt_disable() context and instead save the registers always in its
> > > extra spot on RT.
> > > 
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > ---
> > > 
> > > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> > All is not well on cavium test box.  I'm seeing random errors ala...
> > 
> > ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> > ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> > 
> > ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> > as well, which is unsurprising if it's related to fpsimd woes.  Box
> > does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> 
> Verified to be SIMD woes.  I backported your V2 to 4.14-rt, and the
> CPUS*2 kbuild still reliably reproduced the corruption issue.  I then
> did the below to both 4.14-rt and 4.16-rt, and the corruption is gone.
> 
> (this looks a bit like a patch, but is actually a functional yellow
> sticky should I need to come back for another poke at it later)
> 
> arm64: fpsimd: disable preemption for RT where that is assumed
> 
> 1. Per Sebastian's analysis, kernel_neon_begin() can't be made preemptible:
> If the task-switch notices TIF_FOREIGN_FPSTATE then it would restore task's
> SIMD state and we lose the state of registers used for in-kernel-work.  We
> would require additional storage for the in-kernel copy of the registers.
> But then the NEON-crypto checks for the need-resched flag so it shouldn't
> that bad.
> 
> 2. arch_efi_call_virt_setup/teardown() encapsulate __efi_fpsimd_begin/end()
> in preempt disabled sections via efi_virtmap_load/unload().  That could be
> fixed, but... 
> 
> 3. A local lock solution which left preempt disabled sections 1 & 2 intact
> failed, CPUS*2 parallel kbuild reliably reproduced memory corruption.
> 
> Given the two non-preemptible sections which could encapsulate something
> painful remained intact with the local lock solution, and the fact that
> the remaining BH disabled sections are all small, with empirical evidence
> at hand that at LEAST one truely does require preemption be disabled,
> the best solution for both RT and !RT is to simply disable preemption for
> RT where !RT assumes preemption has been disabled.  That adds no cycles
> to the !RT case, fewer cycles to the RT case, and requires no (ugly) work
> around for the consequences of local_unlock() under preempt_disable().
> 
> Signed-off-by: Mike Galbraith <efault@gmx.de>
> ---
>  arch/arm64/kernel/fpsimd.c |   18 +++++++++++++++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -594,6 +594,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		preempt_disable_rt();
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +605,10 @@ int sve_set_vector_length(struct task_st
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
>  		local_bh_enable();
> +		preempt_enable_rt();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	task_fpsimd_save();
> @@ -850,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -925,6 +930,7 @@ void fpsimd_flush_thread(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
> @@ -968,6 +974,7 @@ void fpsimd_flush_thread(void)
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -979,9 +986,11 @@ void fpsimd_preserve_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  	task_fpsimd_save();
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1021,6 +1030,7 @@ void fpsimd_restore_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> @@ -1029,6 +1039,7 @@ void fpsimd_restore_current_state(void)
>  	}
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1041,6 +1052,7 @@ void fpsimd_update_current_state(struct
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
> @@ -1053,6 +1065,7 @@ void fpsimd_update_current_state(struct
>  		fpsimd_bind_to_cpu();
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1115,6 +1128,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> +	preempt_disable();
>  	local_bh_disable();
>  
>  	__this_cpu_write(kernel_neon_busy, true);
> @@ -1128,8 +1142,6 @@ void kernel_neon_begin(void)
>  	/* Invalidate any task state remaining in the fpsimd regs: */
>  	fpsimd_flush_cpu_state();
>  
> -	preempt_disable();
> -
>  	local_bh_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_begin);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18 10:30                       ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-18 10:30 UTC (permalink / raw)
  To: linux-arm-kernel

See pseudo-patch below.  That cures the reported gcc gripeage.

On Sun, 2018-07-15 at 09:22 +0200, Mike Galbraith wrote:
> On Sat, 2018-07-14 at 00:03 +0200, Mike Galbraith wrote:
> > On Fri, 2018-07-13 at 19:49 +0200, Sebastian Andrzej Siewior wrote:
> > > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > > code disables BH and expects that it is not preemptible. On -RT the
> > > task remains preemptible but remains the same CPU. This may corrupt the
> > > content of the SIMD registers if the task is preempted during
> > > saving/restoring those registers.
> > > Add a locallock around this process. This avoids that the any function
> > > within the locallock block is invoked more than once on the same CPU.
> > > 
> > > The kernel_neon_begin() can't be kept preemptible. If the task-switch notices
> > > TIF_FOREIGN_FPSTATE then it would restore task's SIMD state and we lose the
> > > state of registers used for in-kernel-work. We would require additional storage
> > > for the in-kernel copy of the registers. But then the NEON-crypto checks for
> > > the need-resched flag so it shouldn't that bad.
> > > The preempt_disable() avoids the context switch while the kernel uses the SIMD
> > > registers. Unfortunately we have to balance out the migrate_disable() counter
> > > because local_lock_bh() is invoked in different context compared to its unlock
> > > counterpart.
> > > 
> > > __efi_fpsimd_begin() should not use kernel_fpu_begin() due to its
> > > preempt_disable() context and instead save the registers always in its
> > > extra spot on RT.
> > > 
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > ---
> > > 
> > > This seems to make work (crypto chacha20-neon + cyclictest). I have no
> > > EFI so I have no clue if saving SIMD while calling to EFI works.
> > 
> > All is not well on cavium test box.  I'm seeing random errors ala...
> > 
> > ./include/linux/fs.h:3137:11: internal compiler error: Segmentation fault
> > ./include/linux/bio.h:175:1: internal compiler error: in grokdeclarator, at c/c-decl.c:7023
> > 
> > ...during make -j96 (2*cpus) kbuild.  Turns out 4.14-rt has this issue
> > as well, which is unsurprising if it's related to fpsimd woes.  Box
> > does not exhibit the issue with NONRT kernels, PREEMPT or NOPREEMPT.
> 
> Verified to be SIMD woes.  I backported your V2 to 4.14-rt, and the
> CPUS*2 kbuild still reliably reproduced the corruption issue.  I then
> did the below to both 4.14-rt and 4.16-rt, and the corruption is gone.
> 
> (this looks a bit like a patch, but is actually a functional yellow
> sticky should I need to come back for another poke at it later)
> 
> arm64: fpsimd: disable preemption for RT where that is assumed
> 
> 1. Per Sebastian's analysis, kernel_neon_begin() can't be made preemptible:
> If the task-switch notices TIF_FOREIGN_FPSTATE then it would restore task's
> SIMD state and we lose the state of registers used for in-kernel-work.  We
> would require additional storage for the in-kernel copy of the registers.
> But then the NEON-crypto checks for the need-resched flag so it shouldn't
> that bad.
> 
> 2. arch_efi_call_virt_setup/teardown() encapsulate __efi_fpsimd_begin/end()
> in preempt disabled sections via efi_virtmap_load/unload().  That could be
> fixed, but... 
> 
> 3. A local lock solution which left preempt disabled sections 1 & 2 intact
> failed, CPUS*2 parallel kbuild reliably reproduced memory corruption.
> 
> Given the two non-preemptible sections which could encapsulate something
> painful remained intact with the local lock solution, and the fact that
> the remaining BH disabled sections are all small, with empirical evidence
> at hand that at LEAST one truely does require preemption be disabled,
> the best solution for both RT and !RT is to simply disable preemption for
> RT where !RT assumes preemption has been disabled.  That adds no cycles
> to the !RT case, fewer cycles to the RT case, and requires no (ugly) work
> around for the consequences of local_unlock() under preempt_disable().
> 
> Signed-off-by: Mike Galbraith <efault@gmx.de>
> ---
>  arch/arm64/kernel/fpsimd.c |   18 +++++++++++++++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -594,6 +594,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		preempt_disable_rt();
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +605,10 @@ int sve_set_vector_length(struct task_st
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
>  		local_bh_enable();
> +		preempt_enable_rt();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	task_fpsimd_save();
> @@ -850,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int
>  		WARN_ON(1); /* SVE access shouldn't have trapped */
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -925,6 +930,7 @@ void fpsimd_flush_thread(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
> @@ -968,6 +974,7 @@ void fpsimd_flush_thread(void)
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -979,9 +986,11 @@ void fpsimd_preserve_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  	task_fpsimd_save();
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1021,6 +1030,7 @@ void fpsimd_restore_current_state(void)
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
> @@ -1029,6 +1039,7 @@ void fpsimd_restore_current_state(void)
>  	}
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1041,6 +1052,7 @@ void fpsimd_update_current_state(struct
>  	if (!system_supports_fpsimd())
>  		return;
>  
> +	preempt_disable_rt();
>  	local_bh_disable();
>  
>  	current->thread.fpsimd_state.user_fpsimd = *state;
> @@ -1053,6 +1065,7 @@ void fpsimd_update_current_state(struct
>  		fpsimd_bind_to_cpu();
>  
>  	local_bh_enable();
> +	preempt_enable_rt();
>  }
>  
>  /*
> @@ -1115,6 +1128,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> +	preempt_disable();
>  	local_bh_disable();
>  
>  	__this_cpu_write(kernel_neon_busy, true);
> @@ -1128,8 +1142,6 @@ void kernel_neon_begin(void)
>  	/* Invalidate any task state remaining in the fpsimd regs: */
>  	fpsimd_flush_cpu_state();
>  
> -	preempt_disable();
> -
>  	local_bh_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_begin);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-18 10:28                       ` Mike Galbraith
@ 2018-07-18 10:36                         ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18 10:36 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Steven Rostedt, linux-rt-users, linux-kernel, tglx,
	Catalin Marinas, Will Deacon, linux-arm-kernel

On 2018-07-18 12:28:48 [+0200], Mike Galbraith wrote:
> > Okay, so you did not test this because you can't compile.
> 
> Nope, the running kernel, the one that is doing the segfaulting etc,
> has the patches applied.
> 
> It is exhibiting that symptom because those patches do not cure this
> symptom, one which I verified to be present in virgin 4.14-rt as well. 
> The pseudo-patch I sent, disabling preemption where it is assumed to be
> disabled instead, does cure it.  With preemption so disabled, I can
> beat on affected kernels (>=4.14-rt) as long as I like.

ah. so gcc shows the problem instead gcc explodes with the patch
applied. Okay. Let me stare at this a little more…

> This particular 48 core Cavium is very slow, maybe that makes it easier
> to reproduce, dunno.  According to pipe-test, the thing is essentially
> a dozen RPi super-glued together.  pipe-test pinned to a single core
> can only context switch at ~40KHz with PREEMPT_RT, or ~90 with
> NOPREEMPT, comparable to measurement done in real deal RPi.
> 
> 	-Mike

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-18 10:36                         ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-18 10:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-18 12:28:48 [+0200], Mike Galbraith wrote:
> > Okay, so you did not test this because you can't compile.
> 
> Nope, the running kernel, the one that is doing the segfaulting etc,
> has the patches applied.
> 
> It is exhibiting that symptom because those patches do not cure this
> symptom, one which I verified to be present in virgin 4.14-rt as well. 
> The pseudo-patch I sent, disabling preemption where it is assumed to be
> disabled instead, does cure it.  With preemption so disabled, I can
> beat on affected kernels (>=4.14-rt) as long as I like.

ah. so gcc shows the problem instead gcc explodes with the patch
applied. Okay. Let me stare at this a little more?

> This particular 48 core Cavium is very slow, maybe that makes it easier
> to reproduce, dunno.  According to pipe-test, the thing is essentially
> a dozen RPi super-glued together.  pipe-test pinned to a single core
> can only context switch at ~40KHz with PREEMPT_RT, or ~90 with
> NOPREEMPT, comparable to measurement done in real deal RPi.
> 
> 	-Mike

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-18  9:12                   ` Sebastian Andrzej Siewior
@ 2018-07-24 13:46                       ` Steven Rostedt
  2018-07-24 13:46                       ` Steven Rostedt
  1 sibling, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-07-24 13:46 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Dave Martin, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On Wed, 18 Jul 2018 11:12:10 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> > > @@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
> > >  
> > >  	BUG_ON(!may_use_simd());
> > >  
> > > -	local_bh_disable();
> > > +	local_lock_bh(fpsimd_lock);
> > >  
> > >  	__this_cpu_write(kernel_neon_busy, true);
> > >  
> > > @@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
> > >  	fpsimd_flush_cpu_state();
> > >  
> > >  	preempt_disable();
> > > -
> > > -	local_bh_enable();
> > > +	/*
> > > +	 * ballance atomic vs !atomic context of migrate_disable().
> > > +	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
> > > +	 */
> > > +	migrate_disable();
> > > +	migrate_disable();
> > > +	migrate_disable();
> > > +	local_unlock_bh(fpsimd_lock);  
> > 
> > This looks fragile.
> > 
> > Do we really need to do it 3 times?  
> 
> Unfortunately yes.

Then we need to find another solution, because this is way too ugly and
as Dave said, fragile to keep.


> 
> > Showing my ignorance here, but I'd naively expect that the migrate-
> > disableness changes as follows:
> > 
> > 	/* 0 */
> > 	local_lock_bh(); /* +3 */
> > 
> > 	...
> > 
> > 	preempt_disable(); /* +3 */
> > 
> > 	migrate_disable(); /* +4 */
> > 	migrate_disable(); /* +5 */
> > 	migrate_disable(); /* +6 */
> > 
> > 	local_unlock_bh(); /* +3 */
> > 
> > 
> > If so, I don't understand why it's not OK to call migrate_disable()
> > just once, leaving the count at +1  (?)
> > 
> > I'm making assumptions about the relationship between preempt_disable()
> > and migrate_disable() here.  
> 
> Exactly. So local_lock_bh() does +3 which I try to explain why it is 3.

How does local_lock_bh() do a +3 (not seeing it in the code). And
leaking internal implementation of local_lock_bh() into other parts of
the kernel is a no no.

> Now migrate_disable() has two counters: One for atomic context (irq or
> preemption disabled) and on for non-atomic context. The difference is
> that in atomic context migrate_disable() does nothing and in !atomic
> context it does other things which can't be done in atomic context
> anyway (I can explain this in full detail but you may lose interest so I
> keep it short). To update your example:
> 
> 	/* ATOMIC COUNTER | non-ATOMIC COUNTER  | change */
> 			 /* 0 | 0 | - */
> 	local_lock_bh(); /* 0 | 3 | N+3 */
>  
>  	...
>  
> 	preempt_disable(); /* atomic context */
>  
> 	migrate_disable(); /* 1 | 3 | A+1 */
> 	migrate_disable(); /* 2 | 3 | A+1 */
> 	migrate_disable(); /* 3 | 3 | A+1 */
>  
> 	local_unlock_bh(); /* 0 | 3 | A-3 */
> 
> /* later */
> 	preempt_enable();  /* non-atomic context */
> 	migrate_enable();  /* 0 | 2 | N-1 */
> 	migrate_enable();  /* 0 | 1 | N-1 */
> 	migrate_enable();  /* 0 | 0 | N-1 */

If anything, this needs a wrapper. local_lock_preempt_fixup() ?

-- Steve

> 
> > >  }
> > >  EXPORT_SYMBOL(kernel_neon_begin);
> > >  
> > > @@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
> > >  	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
> > >  
> > >  	preempt_enable();
> > > +	/* balance migrate_disable(). See kernel_neon_begin() */
> > > +	migrate_enable();
> > > +	migrate_enable();
> > > +	migrate_enable();
> > >  }
> > >  EXPORT_SYMBOL(kernel_neon_end);
> > >  
> > > @@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
> > >  	if (!system_supports_fpsimd())
> > >  		return;
> > >  
> > > -	WARN_ON(preemptible());
> > > -
> > > -	if (may_use_simd()) {
> > > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {  
> > 
> > I suspect this is wrong -- see comments on the commit message.
> >   
> > >  		kernel_neon_begin();
> > >  	} else {  
> > 
> > [...]
> > 
> > Cheers
> > ---Dave  


^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-24 13:46                       ` Steven Rostedt
  0 siblings, 0 replies; 69+ messages in thread
From: Steven Rostedt @ 2018-07-24 13:46 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 18 Jul 2018 11:12:10 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> > > @@ -1115,7 +1116,7 @@ void kernel_neon_begin(void)
> > >  
> > >  	BUG_ON(!may_use_simd());
> > >  
> > > -	local_bh_disable();
> > > +	local_lock_bh(fpsimd_lock);
> > >  
> > >  	__this_cpu_write(kernel_neon_busy, true);
> > >  
> > > @@ -1129,8 +1130,14 @@ void kernel_neon_begin(void)
> > >  	fpsimd_flush_cpu_state();
> > >  
> > >  	preempt_disable();
> > > -
> > > -	local_bh_enable();
> > > +	/*
> > > +	 * ballance atomic vs !atomic context of migrate_disable().
> > > +	 * local_lock_bh = get_local_var() + spin_lock_bh (2x migrate_disable)
> > > +	 */
> > > +	migrate_disable();
> > > +	migrate_disable();
> > > +	migrate_disable();
> > > +	local_unlock_bh(fpsimd_lock);  
> > 
> > This looks fragile.
> > 
> > Do we really need to do it 3 times?  
> 
> Unfortunately yes.

Then we need to find another solution, because this is way too ugly and
as Dave said, fragile to keep.


> 
> > Showing my ignorance here, but I'd naively expect that the migrate-
> > disableness changes as follows:
> > 
> > 	/* 0 */
> > 	local_lock_bh(); /* +3 */
> > 
> > 	...
> > 
> > 	preempt_disable(); /* +3 */
> > 
> > 	migrate_disable(); /* +4 */
> > 	migrate_disable(); /* +5 */
> > 	migrate_disable(); /* +6 */
> > 
> > 	local_unlock_bh(); /* +3 */
> > 
> > 
> > If so, I don't understand why it's not OK to call migrate_disable()
> > just once, leaving the count at +1  (?)
> > 
> > I'm making assumptions about the relationship between preempt_disable()
> > and migrate_disable() here.  
> 
> Exactly. So local_lock_bh() does +3 which I try to explain why it is 3.

How does local_lock_bh() do a +3 (not seeing it in the code). And
leaking internal implementation of local_lock_bh() into other parts of
the kernel is a no no.

> Now migrate_disable() has two counters: One for atomic context (irq or
> preemption disabled) and on for non-atomic context. The difference is
> that in atomic context migrate_disable() does nothing and in !atomic
> context it does other things which can't be done in atomic context
> anyway (I can explain this in full detail but you may lose interest so I
> keep it short). To update your example:
> 
> 	/* ATOMIC COUNTER | non-ATOMIC COUNTER  | change */
> 			 /* 0 | 0 | - */
> 	local_lock_bh(); /* 0 | 3 | N+3 */
>  
>  	...
>  
> 	preempt_disable(); /* atomic context */
>  
> 	migrate_disable(); /* 1 | 3 | A+1 */
> 	migrate_disable(); /* 2 | 3 | A+1 */
> 	migrate_disable(); /* 3 | 3 | A+1 */
>  
> 	local_unlock_bh(); /* 0 | 3 | A-3 */
> 
> /* later */
> 	preempt_enable();  /* non-atomic context */
> 	migrate_enable();  /* 0 | 2 | N-1 */
> 	migrate_enable();  /* 0 | 1 | N-1 */
> 	migrate_enable();  /* 0 | 0 | N-1 */

If anything, this needs a wrapper. local_lock_preempt_fixup() ?

-- Steve

> 
> > >  }
> > >  EXPORT_SYMBOL(kernel_neon_begin);
> > >  
> > > @@ -1154,6 +1161,10 @@ void kernel_neon_end(void)
> > >  	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
> > >  
> > >  	preempt_enable();
> > > +	/* balance migrate_disable(). See kernel_neon_begin() */
> > > +	migrate_enable();
> > > +	migrate_enable();
> > > +	migrate_enable();
> > >  }
> > >  EXPORT_SYMBOL(kernel_neon_end);
> > >  
> > > @@ -1185,9 +1196,7 @@ void __efi_fpsimd_begin(void)
> > >  	if (!system_supports_fpsimd())
> > >  		return;
> > >  
> > > -	WARN_ON(preemptible());
> > > -
> > > -	if (may_use_simd()) {
> > > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {  
> > 
> > I suspect this is wrong -- see comments on the commit message.
> >   
> > >  		kernel_neon_begin();
> > >  	} else {  
> > 
> > [...]
> > 
> > Cheers
> > ---Dave  

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-24 13:46                       ` Steven Rostedt
@ 2018-07-24 13:57                         ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-24 13:57 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Dave Martin, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On 2018-07-24 09:46:23 [-0400], Steven Rostedt wrote:
> > Unfortunately yes.
> 
> Then we need to find another solution, because this is way too ugly and
> as Dave said, fragile to keep.

Yes. I have something new where Mike said it works (while this causes
Mike's gcc to segfault). Need to test this myself…

> How does local_lock_bh() do a +3 (not seeing it in the code). And

get_local_var() +1
spin_lock_bh() +2 because
	local_bh_disable() +1
	spin_lock() +1

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-24 13:57                         ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-24 13:57 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-24 09:46:23 [-0400], Steven Rostedt wrote:
> > Unfortunately yes.
> 
> Then we need to find another solution, because this is way too ugly and
> as Dave said, fragile to keep.

Yes. I have something new where Mike said it works (while this causes
Mike's gcc to segfault). Need to test this myself?

> How does local_lock_bh() do a +3 (not seeing it in the code). And

get_local_var() +1
spin_lock_bh() +2 because
	local_bh_disable() +1
	spin_lock() +1

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-18  9:24                       ` Sebastian Andrzej Siewior
@ 2018-07-24 14:45                         ` Dave Martin
  -1 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-07-24 14:45 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Catalin Marinas, Mike Galbraith, Will Deacon,
	linux-kernel, Steven Rostedt, tglx, linux-arm-kernel

On Wed, Jul 18, 2018 at 11:24:48AM +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-18 11:12:10 [+0200], To Dave Martin wrote:
> > > > -	if (may_use_simd()) {
> > > > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
> > > 
> > > I suspect this is wrong -- see comments on the commit message.
> 
> I'm sorry, I pressed send too early, I was aiming for the draft folder.
> So yes, based on that EFI that might be interruptible, let me try to
> look at the initial issue again and maybe I get another idea how to deal
> with this.
> One question: If EFI is interruptible that means, we call into EFI - how
> do we get out? Does EFI enable interrupts and the kernel receives an
> interrupt and treats this EFI call like a regular context switch?

AFAIK the only safe way to get out permanently is for the call to
return.  Note, I've not gone through the spec in fine detail myself.

The OS may handle interrupts occurring during the EFI call, but we
still have to return to EFI afterwards to finish off the call.  From
the Linux perspective, I think this means that EFI calls are non-
preemptible.

Under RT, I'm pretty sure that we can't safely resume the interrupted
EFI call on a different cpu from the one it was interrupted on.  Even
if it doesn't say this explicitly in the UEFI spec, I think it will be
assumed in implementations.


Certain EFI calls are not long-running and may need to be called from
interrupt context in Linux, which means that there may be live kernel-
mode NEON state.  This is why there are separate FPSIMD/SVE percpu stash
buffers for EFI specifically.

Does this make sense?  It's is probably not very clear, but I'm trying
to hide the fact that I haven't looked at the UEFI spec for ages...

Cheers
---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-24 14:45                         ` Dave Martin
  0 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-07-24 14:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jul 18, 2018 at 11:24:48AM +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-18 11:12:10 [+0200], To Dave Martin wrote:
> > > > -	if (may_use_simd()) {
> > > > +	if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
> > > 
> > > I suspect this is wrong -- see comments on the commit message.
> 
> I'm sorry, I pressed send too early, I was aiming for the draft folder.
> So yes, based on that EFI that might be interruptible, let me try to
> look at the initial issue again and maybe I get another idea how to deal
> with this.
> One question: If EFI is interruptible that means, we call into EFI - how
> do we get out? Does EFI enable interrupts and the kernel receives an
> interrupt and treats this EFI call like a regular context switch?

AFAIK the only safe way to get out permanently is for the call to
return.  Note, I've not gone through the spec in fine detail myself.

The OS may handle interrupts occurring during the EFI call, but we
still have to return to EFI afterwards to finish off the call.  From
the Linux perspective, I think this means that EFI calls are non-
preemptible.

Under RT, I'm pretty sure that we can't safely resume the interrupted
EFI call on a different cpu from the one it was interrupted on.  Even
if it doesn't say this explicitly in the UEFI spec, I think it will be
assumed in implementations.


Certain EFI calls are not long-running and may need to be called from
interrupt context in Linux, which means that there may be live kernel-
mode NEON state.  This is why there are separate FPSIMD/SVE percpu stash
buffers for EFI specifically.

Does this make sense?  It's is probably not very clear, but I'm trying
to hide the fact that I haven't looked at the UEFI spec for ages...

Cheers
---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
  2018-07-24 14:45                         ` Dave Martin
@ 2018-07-24 15:15                           ` Ard Biesheuvel
  -1 siblings, 0 replies; 69+ messages in thread
From: Ard Biesheuvel @ 2018-07-24 15:15 UTC (permalink / raw)
  To: Dave Martin
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Catalin Marinas,
	Mike Galbraith, Will Deacon, Linux Kernel Mailing List,
	Steven Rostedt, Thomas Gleixner, linux-arm-kernel

On 24 July 2018 at 16:45, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Jul 18, 2018 at 11:24:48AM +0200, Sebastian Andrzej Siewior wrote:
>> On 2018-07-18 11:12:10 [+0200], To Dave Martin wrote:
>> > > > -       if (may_use_simd()) {
>> > > > +       if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
>> > >
>> > > I suspect this is wrong -- see comments on the commit message.
>>
>> I'm sorry, I pressed send too early, I was aiming for the draft folder.
>> So yes, based on that EFI that might be interruptible, let me try to
>> look at the initial issue again and maybe I get another idea how to deal
>> with this.
>> One question: If EFI is interruptible that means, we call into EFI - how
>> do we get out? Does EFI enable interrupts and the kernel receives an
>> interrupt and treats this EFI call like a regular context switch?
>
> AFAIK the only safe way to get out permanently is for the call to
> return.  Note, I've not gone through the spec in fine detail myself.
>
> The OS may handle interrupts occurring during the EFI call, but we
> still have to return to EFI afterwards to finish off the call.  From
> the Linux perspective, I think this means that EFI calls are non-
> preemptible.
>
> Under RT, I'm pretty sure that we can't safely resume the interrupted
> EFI call on a different cpu from the one it was interrupted on.  Even
> if it doesn't say this explicitly in the UEFI spec, I think it will be
> assumed in implementations.
>

This is a can of worms I would rather not open, although I don't think
the UEFI spec makes it explicit that you cannot migrate runtime
service calls while in progress.

Also, I don't think EFI calls are worth obsessing about, given that
they shouldn't be that common under normal operation. I know that RT
is not about the common case but about the worst case, though. What
problem is migrating a non-preemptible task intended to solve?

>
> Certain EFI calls are not long-running and may need to be called from
> interrupt context in Linux,

This suggests that the need to be called in interrupt context is a
property of the firmware implementation but it is not.

In the kernel, we have efi-pstore which may attempt to invoke runtime
services to record the kernel's dying gasp into a EFI variable. Other
than that, I don't think there are any reasons to call EFI services
from non-process context.

> which means that there may be live kernel-
> mode NEON state.  This is why there are separate FPSIMD/SVE percpu stash
> buffers for EFI specifically.
>

So given the above, and the fact that you can panic() in an interrupt
handler, we needed those buffers, although I wonder if we'd ever reach
the point where we end up resuming execution of the code that uses the
restored contents of those buffers.

> Does this make sense?  It's is probably not very clear, but I'm trying
> to hide the fact that I haven't looked at the UEFI spec for ages...
>
> Cheers
> ---Dave
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable()
@ 2018-07-24 15:15                           ` Ard Biesheuvel
  0 siblings, 0 replies; 69+ messages in thread
From: Ard Biesheuvel @ 2018-07-24 15:15 UTC (permalink / raw)
  To: linux-arm-kernel

On 24 July 2018 at 16:45, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Jul 18, 2018 at 11:24:48AM +0200, Sebastian Andrzej Siewior wrote:
>> On 2018-07-18 11:12:10 [+0200], To Dave Martin wrote:
>> > > > -       if (may_use_simd()) {
>> > > > +       if (!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd()) {
>> > >
>> > > I suspect this is wrong -- see comments on the commit message.
>>
>> I'm sorry, I pressed send too early, I was aiming for the draft folder.
>> So yes, based on that EFI that might be interruptible, let me try to
>> look at the initial issue again and maybe I get another idea how to deal
>> with this.
>> One question: If EFI is interruptible that means, we call into EFI - how
>> do we get out? Does EFI enable interrupts and the kernel receives an
>> interrupt and treats this EFI call like a regular context switch?
>
> AFAIK the only safe way to get out permanently is for the call to
> return.  Note, I've not gone through the spec in fine detail myself.
>
> The OS may handle interrupts occurring during the EFI call, but we
> still have to return to EFI afterwards to finish off the call.  From
> the Linux perspective, I think this means that EFI calls are non-
> preemptible.
>
> Under RT, I'm pretty sure that we can't safely resume the interrupted
> EFI call on a different cpu from the one it was interrupted on.  Even
> if it doesn't say this explicitly in the UEFI spec, I think it will be
> assumed in implementations.
>

This is a can of worms I would rather not open, although I don't think
the UEFI spec makes it explicit that you cannot migrate runtime
service calls while in progress.

Also, I don't think EFI calls are worth obsessing about, given that
they shouldn't be that common under normal operation. I know that RT
is not about the common case but about the worst case, though. What
problem is migrating a non-preemptible task intended to solve?

>
> Certain EFI calls are not long-running and may need to be called from
> interrupt context in Linux,

This suggests that the need to be called in interrupt context is a
property of the firmware implementation but it is not.

In the kernel, we have efi-pstore which may attempt to invoke runtime
services to record the kernel's dying gasp into a EFI variable. Other
than that, I don't think there are any reasons to call EFI services
from non-process context.

> which means that there may be live kernel-
> mode NEON state.  This is why there are separate FPSIMD/SVE percpu stash
> buffers for EFI specifically.
>

So given the above, and the fact that you can panic() in an interrupt
handler, we needed those buffers, although I wonder if we'd ever reach
the point where we end up resuming execution of the code that uses the
restored contents of those buffers.

> Does this make sense?  It's is probably not very clear, but I'm trying
> to hide the fact that I haven't looked at the UEFI spec for ages...
>
> Cheers
> ---Dave
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
  2018-07-24 13:46                       ` Steven Rostedt
@ 2018-07-26 15:06                         ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-26 15:06 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Dave Martin, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.

Add preempt_disable()/enable() to enfore the required semantic on -RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
This should work. Compiling currently gcc-6 on the box to see what
happens. Since the crypto disables preemption "frequently" and I don't
expect or see anything to worry about.

 arch/arm64/kernel/fpsimd.c |   30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -157,6 +157,15 @@ static void sve_free(struct task_struct
 	__sve_free(task);
 }
 
+static void *sve_free_atomic(struct task_struct *task)
+{
+	void *sve_state = task->thread.sve_state;
+
+	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+	task->thread.sve_state = NULL;
+	return sve_state;
+}
 
 /* Offset of FFR in the SVE register dump */
 static size_t sve_ffr_offset(int vl)
@@ -594,6 +603,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		preempt_disable();
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +614,10 @@ int sve_set_vector_length(struct task_st
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
 		local_bh_enable();
+		preempt_enable();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,6 +849,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
+	preempt_disable();
 	local_bh_disable();
 
 	task_fpsimd_save();
@@ -850,6 +863,7 @@ asmlinkage void do_sve_acc(unsigned int
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -921,10 +935,12 @@ void fpsimd_thread_switch(struct task_st
 void fpsimd_flush_thread(void)
 {
 	int vl, supported_vl;
+	void *mem = NULL;
 
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
@@ -932,7 +948,7 @@ void fpsimd_flush_thread(void)
 
 	if (system_supports_sve()) {
 		clear_thread_flag(TIF_SVE);
-		sve_free(current);
+		mem = sve_free_atomic(current);
 
 		/*
 		 * Reset the task vector length as required.
@@ -968,6 +984,8 @@ void fpsimd_flush_thread(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
+	preempt_enable();
+	kfree(mem);
 }
 
 /*
@@ -979,9 +997,11 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 	task_fpsimd_save();
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -1021,6 +1041,7 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1029,6 +1050,7 @@ void fpsimd_restore_current_state(void)
 	}
 
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -1041,6 +1063,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
@@ -1053,6 +1076,7 @@ void fpsimd_update_current_state(struct
 		fpsimd_bind_to_cpu();
 
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -1115,6 +1139,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
+	preempt_disable();
 	local_bh_disable();
 
 	__this_cpu_write(kernel_neon_busy, true);
@@ -1131,6 +1156,7 @@ void kernel_neon_begin(void)
 	preempt_disable();
 
 	local_bh_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_neon_begin);
 

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
@ 2018-07-26 15:06                         ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-26 15:06 UTC (permalink / raw)
  To: linux-arm-kernel

In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.

Add preempt_disable()/enable() to enfore the required semantic on -RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
This should work. Compiling currently gcc-6 on the box to see what
happens. Since the crypto disables preemption "frequently" and I don't
expect or see anything to worry about.

 arch/arm64/kernel/fpsimd.c |   30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -157,6 +157,15 @@ static void sve_free(struct task_struct
 	__sve_free(task);
 }
 
+static void *sve_free_atomic(struct task_struct *task)
+{
+	void *sve_state = task->thread.sve_state;
+
+	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+	task->thread.sve_state = NULL;
+	return sve_state;
+}
 
 /* Offset of FFR in the SVE register dump */
 static size_t sve_ffr_offset(int vl)
@@ -594,6 +603,7 @@ int sve_set_vector_length(struct task_st
 	 * non-SVE thread.
 	 */
 	if (task == current) {
+		preempt_disable();
 		local_bh_disable();
 
 		task_fpsimd_save();
@@ -604,8 +614,10 @@ int sve_set_vector_length(struct task_st
 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 		sve_to_fpsimd(task);
 
-	if (task == current)
+	if (task == current) {
 		local_bh_enable();
+		preempt_enable();
+	}
 
 	/*
 	 * Force reallocation of task SVE state to the correct size
@@ -837,6 +849,7 @@ asmlinkage void do_sve_acc(unsigned int
 
 	sve_alloc(current);
 
+	preempt_disable();
 	local_bh_disable();
 
 	task_fpsimd_save();
@@ -850,6 +863,7 @@ asmlinkage void do_sve_acc(unsigned int
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -921,10 +935,12 @@ void fpsimd_thread_switch(struct task_st
 void fpsimd_flush_thread(void)
 {
 	int vl, supported_vl;
+	void *mem = NULL;
 
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
@@ -932,7 +948,7 @@ void fpsimd_flush_thread(void)
 
 	if (system_supports_sve()) {
 		clear_thread_flag(TIF_SVE);
-		sve_free(current);
+		mem = sve_free_atomic(current);
 
 		/*
 		 * Reset the task vector length as required.
@@ -968,6 +984,8 @@ void fpsimd_flush_thread(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
+	preempt_enable();
+	kfree(mem);
 }
 
 /*
@@ -979,9 +997,11 @@ void fpsimd_preserve_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 	task_fpsimd_save();
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -1021,6 +1041,7 @@ void fpsimd_restore_current_state(void)
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1029,6 +1050,7 @@ void fpsimd_restore_current_state(void)
 	}
 
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -1041,6 +1063,7 @@ void fpsimd_update_current_state(struct
 	if (!system_supports_fpsimd())
 		return;
 
+	preempt_disable();
 	local_bh_disable();
 
 	current->thread.fpsimd_state.user_fpsimd = *state;
@@ -1053,6 +1076,7 @@ void fpsimd_update_current_state(struct
 		fpsimd_bind_to_cpu();
 
 	local_bh_enable();
+	preempt_enable();
 }
 
 /*
@@ -1115,6 +1139,7 @@ void kernel_neon_begin(void)
 
 	BUG_ON(!may_use_simd());
 
+	preempt_disable();
 	local_bh_disable();
 
 	__this_cpu_write(kernel_neon_busy, true);
@@ -1131,6 +1156,7 @@ void kernel_neon_begin(void)
 	preempt_disable();
 
 	local_bh_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_neon_begin);
 

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
  2018-07-26 15:06                         ` Sebastian Andrzej Siewior
@ 2018-07-27  3:17                           ` Mike Galbraith
  -1 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-27  3:17 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Steven Rostedt
  Cc: Dave Martin, linux-rt-users, Catalin Marinas, Will Deacon,
	linux-kernel, tglx, linux-arm-kernel

On Thu, 2018-07-26 at 17:06 +0200, Sebastian Andrzej Siewior wrote:
> 
> @@ -1115,6 +1139,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> +	preempt_disable();
>  	local_bh_disable();
>  
>  	__this_cpu_write(kernel_neon_busy, true);
> @@ -1131,6 +1156,7 @@ void kernel_neon_begin(void)
>  	preempt_disable();

Nit: this preempt_disable() could be removed...
 
>  	local_bh_enable();
> +	preempt_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_begin);

...instead of adding this one.

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
@ 2018-07-27  3:17                           ` Mike Galbraith
  0 siblings, 0 replies; 69+ messages in thread
From: Mike Galbraith @ 2018-07-27  3:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 2018-07-26 at 17:06 +0200, Sebastian Andrzej Siewior wrote:
> 
> @@ -1115,6 +1139,7 @@ void kernel_neon_begin(void)
>  
>  	BUG_ON(!may_use_simd());
>  
> +	preempt_disable();
>  	local_bh_disable();
>  
>  	__this_cpu_write(kernel_neon_busy, true);
> @@ -1131,6 +1156,7 @@ void kernel_neon_begin(void)
>  	preempt_disable();

Nit: this preempt_disable() could be removed...
 
>  	local_bh_enable();
> +	preempt_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_begin);

...instead of adding this one.

	-Mike

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
  2018-07-27  3:17                           ` Mike Galbraith
@ 2018-07-27  7:56                             ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-27  7:56 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Steven Rostedt, Dave Martin, linux-rt-users, Catalin Marinas,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On 2018-07-27 05:17:23 [+0200], Mike Galbraith wrote:
> On Thu, 2018-07-26 at 17:06 +0200, Sebastian Andrzej Siewior wrote:
> > 
> > @@ -1115,6 +1139,7 @@ void kernel_neon_begin(void)
> >  
> >  	BUG_ON(!may_use_simd());
> >  
> > +	preempt_disable();
> >  	local_bh_disable();
> >  
> >  	__this_cpu_write(kernel_neon_busy, true);
> > @@ -1131,6 +1156,7 @@ void kernel_neon_begin(void)
> >  	preempt_disable();
> 
> Nit: this preempt_disable() could be removed...
>  
> >  	local_bh_enable();
> > +	preempt_enable();
> >  }
> >  EXPORT_SYMBOL(kernel_neon_begin);
> 
> ...instead of adding this one.

It could. I have currently no idea for the long term solution and this
keeps track what is intended to do. It might get replaced with
preempt_.*_rt()…

> 	-Mike

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
@ 2018-07-27  7:56                             ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-27  7:56 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-27 05:17:23 [+0200], Mike Galbraith wrote:
> On Thu, 2018-07-26 at 17:06 +0200, Sebastian Andrzej Siewior wrote:
> > 
> > @@ -1115,6 +1139,7 @@ void kernel_neon_begin(void)
> >  
> >  	BUG_ON(!may_use_simd());
> >  
> > +	preempt_disable();
> >  	local_bh_disable();
> >  
> >  	__this_cpu_write(kernel_neon_busy, true);
> > @@ -1131,6 +1156,7 @@ void kernel_neon_begin(void)
> >  	preempt_disable();
> 
> Nit: this preempt_disable() could be removed...
>  
> >  	local_bh_enable();
> > +	preempt_enable();
> >  }
> >  EXPORT_SYMBOL(kernel_neon_begin);
> 
> ...instead of adding this one.

It could. I have currently no idea for the long term solution and this
keeps track what is intended to do. It might get replaced with
preempt_.*_rt()?

> 	-Mike

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
  2018-07-26 15:06                         ` Sebastian Andrzej Siewior
@ 2018-07-27 15:35                           ` Dave Martin
  -1 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-07-27 15:35 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Steven Rostedt, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On Thu, Jul 26, 2018 at 05:06:34PM +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the
> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> 
> Add preempt_disable()/enable() to enfore the required semantic on -RT.

Does this supersede the local_lock based approach?

That would have been nice to have, but if there are open questions about
how to do it then I guess something like this patch makes sense as a
stopgap solution.

> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> This should work. Compiling currently gcc-6 on the box to see what
> happens. Since the crypto disables preemption "frequently" and I don't
> expect or see anything to worry about.
> 
>  arch/arm64/kernel/fpsimd.c |   30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -157,6 +157,15 @@ static void sve_free(struct task_struct
>  	__sve_free(task);
>  }
>  
> +static void *sve_free_atomic(struct task_struct *task)
> +{
> +	void *sve_state = task->thread.sve_state;
> +
> +	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
> +
> +	task->thread.sve_state = NULL;
> +	return sve_state;
> +}

This feels a bit excessive.  Since there's only one call site, I'd
prefer if the necessary code were simply inlined.  We wouldn't need the
WARN either in that case, since (IIUC) it's only there to check for
accidental misuse of this helper.

>  /* Offset of FFR in the SVE register dump */
>  static size_t sve_ffr_offset(int vl)
> @@ -594,6 +603,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		preempt_disable();
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +614,10 @@ int sve_set_vector_length(struct task_st
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
>  		local_bh_enable();
> +		preempt_enable();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,6 +849,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> +	preempt_disable();
>  	local_bh_disable();

I think we should have local helpers for the preempt+local_bh
maintenance, since they're needed all over the place in this
file.

[...]

Cheers
---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
@ 2018-07-27 15:35                           ` Dave Martin
  0 siblings, 0 replies; 69+ messages in thread
From: Dave Martin @ 2018-07-27 15:35 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jul 26, 2018 at 05:06:34PM +0200, Sebastian Andrzej Siewior wrote:
> In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> code disables BH and expects that it is not preemptible. On -RT the
> task remains preemptible but remains the same CPU. This may corrupt the
> content of the SIMD registers if the task is preempted during
> saving/restoring those registers.
> 
> Add preempt_disable()/enable() to enfore the required semantic on -RT.

Does this supersede the local_lock based approach?

That would have been nice to have, but if there are open questions about
how to do it then I guess something like this patch makes sense as a
stopgap solution.

> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> This should work. Compiling currently gcc-6 on the box to see what
> happens. Since the crypto disables preemption "frequently" and I don't
> expect or see anything to worry about.
> 
>  arch/arm64/kernel/fpsimd.c |   30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
> 
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -157,6 +157,15 @@ static void sve_free(struct task_struct
>  	__sve_free(task);
>  }
>  
> +static void *sve_free_atomic(struct task_struct *task)
> +{
> +	void *sve_state = task->thread.sve_state;
> +
> +	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
> +
> +	task->thread.sve_state = NULL;
> +	return sve_state;
> +}

This feels a bit excessive.  Since there's only one call site, I'd
prefer if the necessary code were simply inlined.  We wouldn't need the
WARN either in that case, since (IIUC) it's only there to check for
accidental misuse of this helper.

>  /* Offset of FFR in the SVE register dump */
>  static size_t sve_ffr_offset(int vl)
> @@ -594,6 +603,7 @@ int sve_set_vector_length(struct task_st
>  	 * non-SVE thread.
>  	 */
>  	if (task == current) {
> +		preempt_disable();
>  		local_bh_disable();
>  
>  		task_fpsimd_save();
> @@ -604,8 +614,10 @@ int sve_set_vector_length(struct task_st
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
>  		sve_to_fpsimd(task);
>  
> -	if (task == current)
> +	if (task == current) {
>  		local_bh_enable();
> +		preempt_enable();
> +	}
>  
>  	/*
>  	 * Force reallocation of task SVE state to the correct size
> @@ -837,6 +849,7 @@ asmlinkage void do_sve_acc(unsigned int
>  
>  	sve_alloc(current);
>  
> +	preempt_disable();
>  	local_bh_disable();

I think we should have local helpers for the preempt+local_bh
maintenance, since they're needed all over the place in this
file.

[...]

Cheers
---Dave

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
  2018-07-27 15:35                           ` Dave Martin
@ 2018-07-27 16:26                             ` Sebastian Andrzej Siewior
  -1 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-27 16:26 UTC (permalink / raw)
  To: Dave Martin
  Cc: Steven Rostedt, linux-rt-users, Catalin Marinas, Mike Galbraith,
	Will Deacon, linux-kernel, tglx, linux-arm-kernel

On 2018-07-27 16:35:59 [+0100], Dave Martin wrote:
> On Thu, Jul 26, 2018 at 05:06:34PM +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> > content of the SIMD registers if the task is preempted during
> > saving/restoring those registers.
> > 
> > Add preempt_disable()/enable() to enfore the required semantic on -RT.
> 
> Does this supersede the local_lock based approach?

Yes, it does.

> That would have been nice to have, but if there are open questions about
> how to do it then I guess something like this patch makes sense as a
> stopgap solution.
> 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> > This should work. Compiling currently gcc-6 on the box to see what
> > happens. Since the crypto disables preemption "frequently" and I don't
> > expect or see anything to worry about.
> > 
> >  arch/arm64/kernel/fpsimd.c |   30 ++++++++++++++++++++++++++++--
> >  1 file changed, 28 insertions(+), 2 deletions(-)
> > 
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -157,6 +157,15 @@ static void sve_free(struct task_struct
> >  	__sve_free(task);
> >  }
> >  
> > +static void *sve_free_atomic(struct task_struct *task)
> > +{
> > +	void *sve_state = task->thread.sve_state;
> > +
> > +	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
> > +
> > +	task->thread.sve_state = NULL;
> > +	return sve_state;
> > +}
> 
> This feels a bit excessive.  Since there's only one call site, I'd
> prefer if the necessary code were simply inlined.  We wouldn't need the
> WARN either in that case, since (IIUC) it's only there to check for
> accidental misuse of this helper.
okay.

> >  /* Offset of FFR in the SVE register dump */
> >  static size_t sve_ffr_offset(int vl)
> I think we should have local helpers for the preempt+local_bh
> maintenance, since they're needed all over the place in this
> file.
okay.

> Cheers
> ---Dave

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [PATCH RT v3] arm64: fpsimd: use preemp_disable in addition to local_bh_disable()
@ 2018-07-27 16:26                             ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 69+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-07-27 16:26 UTC (permalink / raw)
  To: linux-arm-kernel

On 2018-07-27 16:35:59 [+0100], Dave Martin wrote:
> On Thu, Jul 26, 2018 at 05:06:34PM +0200, Sebastian Andrzej Siewior wrote:
> > In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
> > code disables BH and expects that it is not preemptible. On -RT the
> > task remains preemptible but remains the same CPU. This may corrupt the
> > content of the SIMD registers if the task is preempted during
> > saving/restoring those registers.
> > 
> > Add preempt_disable()/enable() to enfore the required semantic on -RT.
> 
> Does this supersede the local_lock based approach?

Yes, it does.

> That would have been nice to have, but if there are open questions about
> how to do it then I guess something like this patch makes sense as a
> stopgap solution.
> 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> > This should work. Compiling currently gcc-6 on the box to see what
> > happens. Since the crypto disables preemption "frequently" and I don't
> > expect or see anything to worry about.
> > 
> >  arch/arm64/kernel/fpsimd.c |   30 ++++++++++++++++++++++++++++--
> >  1 file changed, 28 insertions(+), 2 deletions(-)
> > 
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -157,6 +157,15 @@ static void sve_free(struct task_struct
> >  	__sve_free(task);
> >  }
> >  
> > +static void *sve_free_atomic(struct task_struct *task)
> > +{
> > +	void *sve_state = task->thread.sve_state;
> > +
> > +	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
> > +
> > +	task->thread.sve_state = NULL;
> > +	return sve_state;
> > +}
> 
> This feels a bit excessive.  Since there's only one call site, I'd
> prefer if the necessary code were simply inlined.  We wouldn't need the
> WARN either in that case, since (IIUC) it's only there to check for
> accidental misuse of this helper.
okay.

> >  /* Offset of FFR in the SVE register dump */
> >  static size_t sve_ffr_offset(int vl)
?
> I think we should have local helpers for the preempt+local_bh
> maintenance, since they're needed all over the place in this
> file.
okay.

> Cheers
> ---Dave

Sebastian

^ permalink raw reply	[flat|nested] 69+ messages in thread

end of thread, other threads:[~2018-07-27 16:26 UTC | newest]

Thread overview: 69+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-17 12:40 [PATCH RT] arm64: fpsimd: use a local_lock() in addition to local_bh_disable() Sebastian Andrzej Siewior
2018-05-17 12:40 ` Sebastian Andrzej Siewior
2018-05-17 18:19 ` Dave Martin
2018-05-17 18:19   ` Dave Martin
2018-05-18 12:46   ` Dave Martin
2018-05-18 12:46     ` Dave Martin
2018-05-23 14:34     ` Sebastian Andrzej Siewior
2018-05-23 14:34       ` Sebastian Andrzej Siewior
2018-05-23 14:31   ` Sebastian Andrzej Siewior
2018-05-23 14:31     ` Sebastian Andrzej Siewior
2018-05-23 14:55     ` Dave Martin
2018-05-23 14:55       ` Dave Martin
2018-05-22 17:10 ` Steven Rostedt
2018-05-22 17:10   ` Steven Rostedt
2018-05-22 17:21   ` Sebastian Andrzej Siewior
2018-05-22 17:21     ` Sebastian Andrzej Siewior
2018-05-22 17:24     ` Steven Rostedt
2018-05-22 17:24       ` Steven Rostedt
2018-05-22 17:33       ` Sebastian Andrzej Siewior
2018-05-22 17:33         ` Sebastian Andrzej Siewior
2018-07-11 13:25         ` Steven Rostedt
2018-07-11 13:25           ` Steven Rostedt
2018-07-11 13:31           ` Sebastian Andrzej Siewior
2018-07-11 13:31             ` Sebastian Andrzej Siewior
2018-07-11 13:33             ` Steven Rostedt
2018-07-11 13:33               ` Steven Rostedt
2018-07-13 17:49               ` [PATCH RT v2] " Sebastian Andrzej Siewior
2018-07-13 17:49                 ` Sebastian Andrzej Siewior
2018-07-13 17:50                 ` [PATCH RT] locallock: add local_lock_bh() Sebastian Andrzej Siewior
2018-07-13 17:50                   ` Sebastian Andrzej Siewior
2018-07-13 22:03                 ` [PATCH RT v2] arm64: fpsimd: use a local_lock() in addition to local_bh_disable() Mike Galbraith
2018-07-13 22:03                   ` Mike Galbraith
2018-07-15  7:22                   ` Mike Galbraith
2018-07-15  7:22                     ` Mike Galbraith
2018-07-15  7:22                     ` Mike Galbraith
2018-07-18 10:30                     ` Mike Galbraith
2018-07-18 10:30                       ` Mike Galbraith
2018-07-18 10:30                       ` Mike Galbraith
2018-07-18  9:27                   ` Sebastian Andrzej Siewior
2018-07-18  9:27                     ` Sebastian Andrzej Siewior
2018-07-18 10:28                     ` Mike Galbraith
2018-07-18 10:28                       ` Mike Galbraith
2018-07-18 10:28                       ` Mike Galbraith
2018-07-18 10:36                       ` Sebastian Andrzej Siewior
2018-07-18 10:36                         ` Sebastian Andrzej Siewior
2018-07-16 15:17                 ` Dave Martin
2018-07-16 15:17                   ` Dave Martin
2018-07-18  9:12                   ` Sebastian Andrzej Siewior
2018-07-18  9:24                     ` Sebastian Andrzej Siewior
2018-07-18  9:24                       ` Sebastian Andrzej Siewior
2018-07-24 14:45                       ` Dave Martin
2018-07-24 14:45                         ` Dave Martin
2018-07-24 15:15                         ` Ard Biesheuvel
2018-07-24 15:15                           ` Ard Biesheuvel
2018-07-24 13:46                     ` Steven Rostedt
2018-07-24 13:46                       ` Steven Rostedt
2018-07-24 13:57                       ` Sebastian Andrzej Siewior
2018-07-24 13:57                         ` Sebastian Andrzej Siewior
2018-07-26 15:06                       ` [PATCH RT v3] arm64: fpsimd: use preemp_disable " Sebastian Andrzej Siewior
2018-07-26 15:06                         ` Sebastian Andrzej Siewior
2018-07-27  3:17                         ` Mike Galbraith
2018-07-27  3:17                           ` Mike Galbraith
2018-07-27  7:56                           ` Sebastian Andrzej Siewior
2018-07-27  7:56                             ` Sebastian Andrzej Siewior
2018-07-27 15:35                         ` Dave Martin
2018-07-27 15:35                           ` Dave Martin
2018-07-27 16:26                           ` Sebastian Andrzej Siewior
2018-07-27 16:26                             ` Sebastian Andrzej Siewior
2018-07-11 17:07             ` [PATCH RT] arm64: fpsimd: use a local_lock() " Mike Galbraith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.