All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] mips: Add MXU context switching support
@ 2016-06-25 12:14 PrasannaKumar Muralidharan
  2016-07-03 13:17 ` PrasannaKumar Muralidharan
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: PrasannaKumar Muralidharan @ 2016-06-25 12:14 UTC (permalink / raw)
  To: linux-mips
  Cc: linux-kernel, adobriyan, john.stultz, mguzik, athorlton, mhocko,
	ebiederm, gorcunov, luto, cl, serge.hallyn, keescook, jslaby,
	akpm, macro, f.fainelli, mingo, alex.smith, markos.chandras,
	Leonid.Yegoshin, david.daney, zhaoxiu.zeng, chenhc,
	Zubair.Kakakhel, james.hogan, paul.burton, ralf,
	PrasannaKumar Muralidharan

From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>

This patch adds support for context switching Xburst MXU registers. The
registers are named xr0 to xr16. xr16 is the control register that can
be used to enable and disable MXU instruction set. Read and write to
these registers can be done without enabling MXU instruction set by user
space. Only when MXU instruction set is enabled any MXU instruction
(other than read or write to xr registers) can be done. xr0 is always 0.

Kernel does not know when MXU instruction is enabled or disabled. So
during context switch if MXU is enabled in xr16 register then MXU
registers are saved, restored when the task is run. When user space
application enables MXU, it is not reflected in other threads
immediately. So for convenience the applications can use prctl syscall
to let the MXU state propagate across threads running in different CPUs.

Signed-off-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
---
 arch/mips/include/asm/cpu-features.h |   4 +
 arch/mips/include/asm/cpu.h          |   1 +
 arch/mips/include/asm/mxu.h          | 157 +++++++++++++++++++++++++++++++++++
 arch/mips/include/asm/processor.h    |  19 +++++
 arch/mips/include/asm/switch_to.h    |  40 +++++++++
 arch/mips/kernel/cpu-probe.c         |   1 +
 arch/mips/kernel/process.c           |  48 +++++++++++
 include/uapi/linux/prctl.h           |   3 +
 kernel/sys.c                         |   6 ++
 9 files changed, 279 insertions(+)
 create mode 100644 arch/mips/include/asm/mxu.h

diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index e961c8a..c6270b0 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -345,6 +345,10 @@
 #define cpu_has_dsp3		(cpu_data[0].ases & MIPS_ASE_DSP3)
 #endif
 
+#ifndef cpu_has_mxu
+#define cpu_has_mxu		(cpu_data[0].ases & MIPS_ASE_MXU)
+#endif
+
 #ifndef cpu_has_mipsmt
 #define cpu_has_mipsmt		(cpu_data[0].ases & MIPS_ASE_MIPSMT)
 #endif
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index f672df8..77ab797 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -428,5 +428,6 @@ enum cpu_type_enum {
 #define MIPS_ASE_VZ		0x00000080 /* Virtualization ASE */
 #define MIPS_ASE_MSA		0x00000100 /* MIPS SIMD Architecture */
 #define MIPS_ASE_DSP3		0x00000200 /* Signal Processing ASE Rev 3*/
+#define MIPS_ASE_MXU		0x00000400 /* Xburst MXU instruction set */
 
 #endif /* _ASM_CPU_H */
diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
new file mode 100644
index 0000000..cf77cbd
--- /dev/null
+++ b/arch/mips/include/asm/mxu.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) Ingenic Semiconductor
+ * File taken from Ingenic Semiconductor's linux repo available in github
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#ifndef _ASM_MXU_H
+#define _ASM_MXU_H
+
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/hazards.h>
+#include <asm/mipsregs.h>
+
+static inline void __enable_mxu(void)
+{
+	unsigned int register val asm("t0");
+	val = 3;
+	asm volatile(".word	0x7008042f\n\t"::"r"(val));
+}
+
+static inline void enable_mxu(void)
+{
+	if (cpu_has_mxu)
+		__enable_mxu();
+}
+
+static inline void disable_mxu(void)
+{
+	unsigned int register val asm("t0");
+	val = 0;
+	asm volatile(".word	0x7008042f\n\t"::"r"(val));
+	asm("nop\n\t");
+	asm("nop\n\t");
+	asm("nop\n\t");
+}
+
+static inline int mxu_used(void)
+{
+	unsigned int register reg_val asm("t0");
+	asm volatile(".word	0x7008042f\n\t"::"r"(reg_val));
+	return reg_val &0x01;
+}
+
+static inline void __save_mxu(void *tsk_void)
+{
+	struct task_struct *tsk = tsk_void;
+
+	register unsigned int reg_val asm("t0");
+
+	asm volatile(".word	0x7008042e\n\t");
+	tsk->thread.mxu.xr[0] = reg_val;
+	asm volatile(".word	0x7008006e\n\t");
+	tsk->thread.mxu.xr[1] = reg_val;
+	asm volatile(".word	0x700800ae\n\t");
+	tsk->thread.mxu.xr[2] = reg_val;
+	asm volatile(".word	0x700800ee\n\t");
+	tsk->thread.mxu.xr[3] = reg_val;
+	asm volatile(".word	0x7008012e\n\t");
+	tsk->thread.mxu.xr[4] = reg_val;
+	asm volatile(".word	0x7008016e\n\t");
+	tsk->thread.mxu.xr[5] = reg_val;
+	asm volatile(".word	0x700801ae\n\t");
+	tsk->thread.mxu.xr[6] = reg_val;
+	asm volatile(".word	0x700801ee\n\t");
+	tsk->thread.mxu.xr[7] = reg_val;
+	asm volatile(".word	0x7008022e\n\t");
+	tsk->thread.mxu.xr[8] = reg_val;
+	asm volatile(".word	0x7008026e\n\t");
+	tsk->thread.mxu.xr[9] = reg_val;
+	asm volatile(".word	0x700802ae\n\t");
+	tsk->thread.mxu.xr[10] = reg_val;
+	asm volatile(".word	0x700802ee\n\t");
+	tsk->thread.mxu.xr[11] = reg_val;
+	asm volatile(".word	0x7008032e\n\t");
+	tsk->thread.mxu.xr[12] = reg_val;
+	asm volatile(".word	0x7008036e\n\t");
+	tsk->thread.mxu.xr[13] = reg_val;
+	asm volatile(".word	0x700803ae\n\t");
+	tsk->thread.mxu.xr[14] = reg_val;
+	asm volatile(".word	0x700803ee\n\t");
+	tsk->thread.mxu.xr[15] = reg_val;
+}
+
+static inline void __restore_mxu(void *tsk_void)
+{
+	struct task_struct *tsk = tsk_void;
+
+	register unsigned int reg_val asm("t0");
+
+	reg_val = tsk->thread.mxu.xr[0];
+	asm volatile(".word	0x7008042f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[1];
+	asm volatile(".word	0x7008006f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[2];
+	asm volatile(".word	0x700800af\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[3];
+	asm volatile(".word	0x700800ef\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[4];
+	asm volatile(".word	0x7008012f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[5];
+	asm volatile(".word	0x7008016f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[6];
+	asm volatile(".word	0x700801af\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[7];
+	asm volatile(".word	0x700801ef\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[8];
+	asm volatile(".word	0x7008022f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[9];
+	asm volatile(".word	0x7008026f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[10];
+	asm volatile(".word	0x700802af\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[11];
+	asm volatile(".word	0x700802ef\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[12];
+	asm volatile(".word	0x7008032f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[13];
+	asm volatile(".word	0x7008036f\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[14];
+	asm volatile(".word	0x700803af\n\t"::"r"(reg_val));
+	reg_val = tsk->thread.mxu.xr[15];
+	asm volatile(".word	0x700803ef\n\t"::"r"(reg_val));
+}
+
+#define save_mxu(tsk)						\
+	do {							\
+		if (cpu_has_mxu)				\
+			__save_mxu(tsk);			\
+	} while (0)
+
+#define restore_mxu(tsk)					\
+	do {							\
+		if (cpu_has_mxu)				\
+			__restore_mxu(tsk);			\
+	} while (0)
+
+#define __get_mxu_regs(tsk)					\
+	({							\
+		if (tsk == current)				\
+			__save_mxu(current);			\
+								\
+		tsk->thread.mxu.xr;				\
+	})
+
+#define __let_mxu_regs(tsk, regs)				\
+	do {							\
+		int i;						\
+		for (i = 0; i < NUM_MXU_REGS; i++)		\
+			tsk->thread.mxu.xr[i] = regs[i];	\
+		if (tsk == current)				\
+			__save_mxu(current);			\
+	} while (0)
+
+#endif /* _ASM_MXU_H */
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 7e78b62..a4def30 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -142,6 +142,11 @@ struct mips_dsp_state {
 	unsigned int	dspcontrol;
 };
 
+#define NUM_MXU_REGS 16
+struct xburst_mxu_state {
+	unsigned int xr[NUM_MXU_REGS];
+};
+
 #define INIT_CPUMASK { \
 	{0,} \
 }
@@ -266,6 +271,10 @@ struct thread_struct {
 	/* Saved state of the DSP ASE, if available. */
 	struct mips_dsp_state dsp;
 
+	unsigned int mxu_used;
+	/* Saved registers of Xburst MXU, if available. */
+	struct xburst_mxu_state mxu;
+
 	/* Saved watch register state, if available. */
 	union mips_watch_reg_state watch;
 
@@ -330,6 +339,10 @@ struct thread_struct {
 		.dspr		= {0, },			\
 		.dspcontrol	= 0,				\
 	},							\
+	.mxu_used		= 0,				\
+	.mxu			= {				\
+		.xr		= {0, },			\
+	},							\
 	/*							\
 	 * saved watch register stuff				\
 	 */							\
@@ -410,4 +423,10 @@ extern int mips_set_process_fp_mode(struct task_struct *task,
 #define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
 #define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
 
+extern int mips_enable_mxu_other_cpus(void);
+extern int mips_disable_mxu_other_cpus(void);
+
+#define ENABLE_MXU_OTHER_CPUS()         mips_enable_mxu_other_cpus()
+#define DISABLE_MXU_OTHER_CPUS()        mips_disable_mxu_other_cpus()
+
 #endif /* _ASM_PROCESSOR_H */
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index ebb5c0f..112aff5 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -75,6 +75,43 @@ do {	if (cpu_has_rw_llb) {						\
 	}								\
 } while (0)
 
+static inline void handle_mxu_state(struct task_struct *prev,
+		struct task_struct *next)
+{
+	struct task_struct *thread = NULL;
+
+	if (prev->thread.mxu_used) {
+		if (mxu_used() == 1) {
+			__save_mxu(prev);
+		} else {
+			prev->thread.mxu_used = 0;
+			thread = prev;
+			rcu_read_lock();
+			while_each_thread(prev, thread) {
+				thread->thread.mxu_used = 0;
+			};
+			rcu_read_unlock();
+		}
+	} else {
+		if (mxu_used() == 1) {
+			__save_mxu(prev);
+			prev->thread.mxu_used = 1;
+			thread = prev;
+			rcu_read_lock();
+			while_each_thread(prev, thread) {
+				thread->thread.mxu_used = 1;
+			};
+			rcu_read_unlock();
+		}
+	}
+	disable_mxu();
+
+	if (next->thread.mxu_used) {
+		__restore_mxu(next);
+		enable_mxu();
+	}
+}
+
 /*
  * For newly created kernel threads switch_to() will return to
  * ret_from_kernel_thread, newly created user threads to ret_from_fork.
@@ -89,6 +126,9 @@ do {									\
 		__save_dsp(prev);					\
 		__restore_dsp(next);					\
 	}								\
+	if (cpu_has_mxu) {						\
+		handle_mxu_state(prev, next);				\
+	}								\
 	if (cop2_present) {						\
 		set_c0_status(ST0_CU2);					\
 		if ((KSTK_STATUS(prev) & ST0_CU2)) {			\
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index a88d442..90ffc40 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1836,6 +1836,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_JZRISC:
 		c->cputype = CPU_JZRISC;
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+		c->ases |= MIPS_ASE_MXU;
 		__cpu_name[cpu] = "Ingenic JZRISC";
 		break;
 	default:
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 813ed78..6310092 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -31,6 +31,7 @@
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
 #include <asm/dsp.h>
+#include <asm/mxu.h>
 #include <asm/fpu.h>
 #include <asm/msa.h>
 #include <asm/pgtable.h>
@@ -90,6 +91,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 		_save_fp(current);
 
 	save_dsp(current);
+	if (src->thread.mxu_used) {
+		save_mxu(src);
+		/* Disable mxu usage for new process */
+		dst->thread.mxu_used = 0;
+	}
 
 	preempt_enable();
 
@@ -162,6 +168,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	if (clone_flags & CLONE_SETTLS)
 		ti->tp_value = regs->regs[7];
 
+	if (clone_flags & CLONE_THREAD)
+		p->thread.mxu_used = current->thread.mxu_used;
+
 	return 0;
 }
 
@@ -652,3 +661,42 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 
 	return 0;
 }
+
+int mips_change_mxu_state_other_cpus(int mxu_enable_state)
+{
+	if (!cpu_has_mxu)
+		return 0;
+
+	/*
+	 * Deschedule tasks from same thread group running in other CPUs. When
+	 * they are scheduled back they will have MXU enabled.
+	 */
+	if ((mxu_used() == mxu_enable_state) && (num_online_cpus() > 1)) {
+		struct task_struct *thread = current;
+		int this_cpu = task_cpu(current);
+		int cpu;
+
+		while_each_thread(current, thread) {
+			if (current != thread &&
+					thread->state == TASK_RUNNING) {
+				cpu = task_cpu(thread);
+				if (this_cpu != cpu && cpu_online(cpu))
+					smp_send_reschedule(task_cpu(thread));
+			}
+		};
+
+		schedule();
+	}
+
+	return 0;
+}
+
+int mips_enable_mxu_other_cpus(void)
+{
+	return mips_change_mxu_state_other_cpus(1);
+}
+
+int mips_disable_mxu_other_cpus(void)
+{
+	return mips_change_mxu_state_other_cpus(0);
+}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759..b193d91 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,7 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+#define PR_ENABLE_MXU_OTHER_CPUS        48
+#define PR_DISABLE_MXU_OTHER_CPUS       49
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be4..fbbc7b2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2270,6 +2270,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_FP_MODE:
 		error = GET_FP_MODE(me);
 		break;
+	case PR_ENABLE_MXU_OTHER_CPUS:
+		error = ENABLE_MXU_OTHER_CPUS();
+		break;
+	case PR_DISABLE_MXU_OTHER_CPUS:
+		error = DISABLE_MXU_OTHER_CPUS();
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
  2016-06-25 12:14 [RFC] mips: Add MXU context switching support PrasannaKumar Muralidharan
@ 2016-07-03 13:17 ` PrasannaKumar Muralidharan
  2016-07-04 21:45 ` Florian Fainelli
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: PrasannaKumar Muralidharan @ 2016-07-03 13:17 UTC (permalink / raw)
  To: linux-mips
  Cc: linux-kernel, Alexey Dobriyan, John Stultz, mguzik, athorlton,
	mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn, Kees Cook,
	jslaby, akpm, macro, Florian Fainelli, mingo, alex.smith,
	markos.chandras, Leonid.Yegoshin, david.daney, zhaoxiu.zeng,
	chenhc, Zubair.Kakakhel, james.hogan, Paul Burton, ralf,
	PrasannaKumar Muralidharan

Will be great if someone could review this. Thanks in advance.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
  2016-06-25 12:14 [RFC] mips: Add MXU context switching support PrasannaKumar Muralidharan
  2016-07-03 13:17 ` PrasannaKumar Muralidharan
@ 2016-07-04 21:45 ` Florian Fainelli
  2016-07-05  9:47   ` PrasannaKumar Muralidharan
  2016-07-04 22:30   ` Maciej W. Rozycki
  2016-07-05  9:34   ` Paul Burton
  3 siblings, 1 reply; 13+ messages in thread
From: Florian Fainelli @ 2016-07-04 21:45 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan, linux-mips
  Cc: linux-kernel, adobriyan, john.stultz, mguzik, athorlton, mhocko,
	ebiederm, gorcunov, luto, cl, serge.hallyn, keescook, jslaby,
	akpm, macro, mingo, alex.smith, markos.chandras, Leonid.Yegoshin,
	david.daney, zhaoxiu.zeng, chenhc, Zubair.Kakakhel, james.hogan,
	paul.burton, ralf

Le 25/06/2016 05:14, PrasannaKumar Muralidharan a écrit :
> From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
> 
> This patch adds support for context switching Xburst MXU registers. The
> registers are named xr0 to xr16. xr16 is the control register that can
> be used to enable and disable MXU instruction set. Read and write to
> these registers can be done without enabling MXU instruction set by user
> space. Only when MXU instruction set is enabled any MXU instruction
> (other than read or write to xr registers) can be done. xr0 is always 0.
> 
> Kernel does not know when MXU instruction is enabled or disabled. So
> during context switch if MXU is enabled in xr16 register then MXU
> registers are saved, restored when the task is run. When user space
> application enables MXU, it is not reflected in other threads
> immediately. So for convenience the applications can use prctl syscall
> to let the MXU state propagate across threads running in different CPUs.

This is all well and good and seems useful, but you have not stated why
this is even useful in the first place?

> 
> Signed-off-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
> ---

[snip]

> diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
> index 7e78b62..a4def30 100644
> --- a/arch/mips/include/asm/processor.h
> +++ b/arch/mips/include/asm/processor.h
> @@ -142,6 +142,11 @@ struct mips_dsp_state {
>  	unsigned int	dspcontrol;
>  };
>  
> +#define NUM_MXU_REGS 16
> +struct xburst_mxu_state {
> +	unsigned int xr[NUM_MXU_REGS];
> +};
> +
>  #define INIT_CPUMASK { \
>  	{0,} \
>  }
> @@ -266,6 +271,10 @@ struct thread_struct {
>  	/* Saved state of the DSP ASE, if available. */
>  	struct mips_dsp_state dsp;
>  
> +	unsigned int mxu_used;
> +	/* Saved registers of Xburst MXU, if available. */
> +	struct xburst_mxu_state mxu;

That's adding about 17 * 4 bytes to a structure that is probably best
kept as small as possible, might be worth adding an ifdef here specific
to the Ingenic platforms w/ MXU?

> +
>  	/* Saved watch register state, if available. */
>  	union mips_watch_reg_state watch;
>  
> @@ -330,6 +339,10 @@ struct thread_struct {
>  		.dspr		= {0, },			\
>  		.dspcontrol	= 0,				\
>  	},							\
> +	.mxu_used		= 0,				\
> +	.mxu			= {				\
> +		.xr		= {0, },			\
> +	},							\
>  	/*							\
>  	 * saved watch register stuff				\
>  	 */							\
> @@ -410,4 +423,10 @@ extern int mips_set_process_fp_mode(struct task_struct *task,
>  #define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
>  #define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
>  
> +extern int mips_enable_mxu_other_cpus(void);
> +extern int mips_disable_mxu_other_cpus(void);
> +
> +#define ENABLE_MXU_OTHER_CPUS()         mips_enable_mxu_other_cpus()
> +#define DISABLE_MXU_OTHER_CPUS()        mips_disable_mxu_other_cpus()

Where is the stub when building for !MIPS? Have you build a kernel with
your changes for e.g: x86?

[snip]

> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index a8d0759..b193d91 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -197,4 +197,7 @@ struct prctl_mm_map {
>  # define PR_CAP_AMBIENT_LOWER		3
>  # define PR_CAP_AMBIENT_CLEAR_ALL	4
>  
> +#define PR_ENABLE_MXU_OTHER_CPUS        48
> +#define PR_DISABLE_MXU_OTHER_CPUS       49
> +
>  #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 89d5be4..fbbc7b2 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2270,6 +2270,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>  	case PR_GET_FP_MODE:
>  		error = GET_FP_MODE(me);
>  		break;
> +	case PR_ENABLE_MXU_OTHER_CPUS:
> +		error = ENABLE_MXU_OTHER_CPUS();
> +		break;
> +	case PR_DISABLE_MXU_OTHER_CPUS:
> +		error = DISABLE_MXU_OTHER_CPUS();
> +		break;

Is not there a way to call into an architecture specific prctl() for the
unhandled options passed to prctl()? Not everybody will
want/implement/support that, and, more importantly changing generic code
here looks fishy and probably the wrong abstraction.
-- 
Florian

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
@ 2016-07-04 22:30   ` Maciej W. Rozycki
  0 siblings, 0 replies; 13+ messages in thread
From: Maciej W. Rozycki @ 2016-07-04 22:30 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan
  Cc: linux-mips, linux-kernel, adobriyan, john.stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	keescook, jslaby, Andrew Morton, f.fainelli, mingo, alex.smith,
	markos.chandras, Leonid Yegoshin, david.daney, zhaoxiu.zeng,
	chenhc, Zubair.Kakakhel, James Hogan, paul.burton, Ralf Baechle

On Sat, 25 Jun 2016, PrasannaKumar Muralidharan wrote:

> diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
> new file mode 100644
> index 0000000..cf77cbd
> --- /dev/null
> +++ b/arch/mips/include/asm/mxu.h
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright (C) Ingenic Semiconductor
> + * File taken from Ingenic Semiconductor's linux repo available in github
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + */
> +#ifndef _ASM_MXU_H
> +#define _ASM_MXU_H
> +
> +#include <asm/cpu.h>
> +#include <asm/cpu-features.h>
> +#include <asm/hazards.h>
> +#include <asm/mipsregs.h>
> +
> +static inline void __enable_mxu(void)
> +{
> +	unsigned int register val asm("t0");
> +	val = 3;
> +	asm volatile(".word	0x7008042f\n\t"::"r"(val));

 Can you please document your manually generated machine code, i.e. what 
instruction 0x7008042f actually is?

 Also our convention has been to separate asm operands with spaces, and 
there's no need for a new line or a tab character at the end of an 
inline as GCC will add these automatically as needed, i.e.:

	asm volatile(".word	0x7008042f" : : "r" (val));

Likewise throughout.

> +static inline void __save_mxu(void *tsk_void)
> +{
> +	struct task_struct *tsk = tsk_void;
> +
> +	register unsigned int reg_val asm("t0");
> +
> +	asm volatile(".word	0x7008042e\n\t");
> +	tsk->thread.mxu.xr[0] = reg_val;
> +	asm volatile(".word	0x7008006e\n\t");
> +	tsk->thread.mxu.xr[1] = reg_val;
> +	asm volatile(".word	0x700800ae\n\t");
> +	tsk->thread.mxu.xr[2] = reg_val;
> +	asm volatile(".word	0x700800ee\n\t");
> +	tsk->thread.mxu.xr[3] = reg_val;
> +	asm volatile(".word	0x7008012e\n\t");
> +	tsk->thread.mxu.xr[4] = reg_val;
> +	asm volatile(".word	0x7008016e\n\t");
> +	tsk->thread.mxu.xr[5] = reg_val;
> +	asm volatile(".word	0x700801ae\n\t");
> +	tsk->thread.mxu.xr[6] = reg_val;
> +	asm volatile(".word	0x700801ee\n\t");
> +	tsk->thread.mxu.xr[7] = reg_val;
> +	asm volatile(".word	0x7008022e\n\t");
> +	tsk->thread.mxu.xr[8] = reg_val;
> +	asm volatile(".word	0x7008026e\n\t");
> +	tsk->thread.mxu.xr[9] = reg_val;
> +	asm volatile(".word	0x700802ae\n\t");
> +	tsk->thread.mxu.xr[10] = reg_val;
> +	asm volatile(".word	0x700802ee\n\t");
> +	tsk->thread.mxu.xr[11] = reg_val;
> +	asm volatile(".word	0x7008032e\n\t");
> +	tsk->thread.mxu.xr[12] = reg_val;
> +	asm volatile(".word	0x7008036e\n\t");
> +	tsk->thread.mxu.xr[13] = reg_val;
> +	asm volatile(".word	0x700803ae\n\t");
> +	tsk->thread.mxu.xr[14] = reg_val;
> +	asm volatile(".word	0x700803ee\n\t");
> +	tsk->thread.mxu.xr[15] = reg_val;
> +}

 Not using an output operand with asms here?

  Maciej

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
@ 2016-07-04 22:30   ` Maciej W. Rozycki
  0 siblings, 0 replies; 13+ messages in thread
From: Maciej W. Rozycki @ 2016-07-04 22:30 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan
  Cc: linux-mips, linux-kernel, adobriyan, john.stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	keescook, jslaby, Andrew Morton, f.fainelli, mingo, alex.smith,
	markos.chandras, Leonid Yegoshin, david.daney, zhaoxiu.zeng,
	chenhc, Zubair.Kakakhel, James Hogan, paul.burton, Ralf Baechle

On Sat, 25 Jun 2016, PrasannaKumar Muralidharan wrote:

> diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
> new file mode 100644
> index 0000000..cf77cbd
> --- /dev/null
> +++ b/arch/mips/include/asm/mxu.h
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright (C) Ingenic Semiconductor
> + * File taken from Ingenic Semiconductor's linux repo available in github
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + */
> +#ifndef _ASM_MXU_H
> +#define _ASM_MXU_H
> +
> +#include <asm/cpu.h>
> +#include <asm/cpu-features.h>
> +#include <asm/hazards.h>
> +#include <asm/mipsregs.h>
> +
> +static inline void __enable_mxu(void)
> +{
> +	unsigned int register val asm("t0");
> +	val = 3;
> +	asm volatile(".word	0x7008042f\n\t"::"r"(val));

 Can you please document your manually generated machine code, i.e. what 
instruction 0x7008042f actually is?

 Also our convention has been to separate asm operands with spaces, and 
there's no need for a new line or a tab character at the end of an 
inline as GCC will add these automatically as needed, i.e.:

	asm volatile(".word	0x7008042f" : : "r" (val));

Likewise throughout.

> +static inline void __save_mxu(void *tsk_void)
> +{
> +	struct task_struct *tsk = tsk_void;
> +
> +	register unsigned int reg_val asm("t0");
> +
> +	asm volatile(".word	0x7008042e\n\t");
> +	tsk->thread.mxu.xr[0] = reg_val;
> +	asm volatile(".word	0x7008006e\n\t");
> +	tsk->thread.mxu.xr[1] = reg_val;
> +	asm volatile(".word	0x700800ae\n\t");
> +	tsk->thread.mxu.xr[2] = reg_val;
> +	asm volatile(".word	0x700800ee\n\t");
> +	tsk->thread.mxu.xr[3] = reg_val;
> +	asm volatile(".word	0x7008012e\n\t");
> +	tsk->thread.mxu.xr[4] = reg_val;
> +	asm volatile(".word	0x7008016e\n\t");
> +	tsk->thread.mxu.xr[5] = reg_val;
> +	asm volatile(".word	0x700801ae\n\t");
> +	tsk->thread.mxu.xr[6] = reg_val;
> +	asm volatile(".word	0x700801ee\n\t");
> +	tsk->thread.mxu.xr[7] = reg_val;
> +	asm volatile(".word	0x7008022e\n\t");
> +	tsk->thread.mxu.xr[8] = reg_val;
> +	asm volatile(".word	0x7008026e\n\t");
> +	tsk->thread.mxu.xr[9] = reg_val;
> +	asm volatile(".word	0x700802ae\n\t");
> +	tsk->thread.mxu.xr[10] = reg_val;
> +	asm volatile(".word	0x700802ee\n\t");
> +	tsk->thread.mxu.xr[11] = reg_val;
> +	asm volatile(".word	0x7008032e\n\t");
> +	tsk->thread.mxu.xr[12] = reg_val;
> +	asm volatile(".word	0x7008036e\n\t");
> +	tsk->thread.mxu.xr[13] = reg_val;
> +	asm volatile(".word	0x700803ae\n\t");
> +	tsk->thread.mxu.xr[14] = reg_val;
> +	asm volatile(".word	0x700803ee\n\t");
> +	tsk->thread.mxu.xr[15] = reg_val;
> +}

 Not using an output operand with asms here?

  Maciej

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
@ 2016-07-05  9:34   ` Paul Burton
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Burton @ 2016-07-05  9:34 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan
  Cc: linux-mips, linux-kernel, adobriyan, john.stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	keescook, jslaby, akpm, macro, f.fainelli, mingo, alex.smith,
	markos.chandras, Leonid.Yegoshin, david.daney, zhaoxiu.zeng,
	chenhc, Zubair.Kakakhel, james.hogan, ralf

Hi PrasannaKumar,

On 25/06/16 13:14, PrasannaKumar Muralidharan wrote:
> From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
>
> This patch adds support for context switching Xburst MXU registers. The
> registers are named xr0 to xr16. xr16 is the control register that can
> be used to enable and disable MXU instruction set. Read and write to
> these registers can be done without enabling MXU instruction set by user
> space. Only when MXU instruction set is enabled any MXU instruction
> (other than read or write to xr registers) can be done. xr0 is always 0.

Do you have any examples of userland programs making use of MXU? They 
would be useful in allowing people to test this patch.

How have you tested this?

> Kernel does not know when MXU instruction is enabled or disabled. So
> during context switch if MXU is enabled in xr16 register then MXU
> registers are saved, restored when the task is run.

I'm not convinced this is the right way to go. It seems complex & 
fragile vs the alternatives, the simplest of which could be to just 
always save & restore MXU context in kernels with MXU support. Is there 
a significant performance cost to just unconditionally saving & 
restoring the MXU context? That is after all what Ingenic's vendor 
kernel, which it looks like large parts of your patch are taken from, does.

> When user space
> application enables MXU, it is not reflected in other threads
> immediately. So for convenience the applications can use prctl syscall
> to let the MXU state propagate across threads running in different CPUs.

Surely it wouldn't be reflected at all, since each thread has its own 
MXU context? Would you expect applications to actually want to enable 
MXU on one thread & make use of it from other already running threads? 
Off the top of my head I can't think of why that would be useful, so I'm 
wondering whether it would be better to just let each thread handle 
enabling MXU if it wants & leave the kernel out of it. If we just save & 
restore unconditionally then this becomes a non-issue anyway.

> Signed-off-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>optimise out
> ---
>  arch/mips/include/asm/cpu-features.h |   4 +
>  arch/mips/include/asm/cpu.h          |   1 +
>  arch/mips/include/asm/mxu.h          | 157 +++++++++++++++++++++++++++++++++++
>  arch/mips/include/asm/processor.h    |  19 +++++
>  arch/mips/include/asm/switch_to.h    |  40 +++++++++
>  arch/mips/kernel/cpu-probe.c         |   1 +
>  arch/mips/kernel/process.c           |  48 +++++++++++
>  include/uapi/linux/prctl.h           |   3 +
>  kernel/sys.c                         |   6 ++
>  9 files changed, 279 insertions(+)
>  create mode 100644 arch/mips/include/asm/mxu.h
>
> diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
> index e961c8a..c6270b0 100644
> --- a/arch/mips/include/asm/cpu-features.h
> +++ b/arch/mips/include/asm/cpu-features.h
> @@ -345,6 +345,10 @@
>  #define cpu_has_dsp3		(cpu_data[0].ases & MIPS_ASE_DSP3)
>  #endif
>
> +#ifndef cpu_has_mxu
> +#define cpu_has_mxu		(cpu_data[0].ases & MIPS_ASE_MXU)

This really ought to be defined as a constant 0 for non-Ingenic kernels, 
so that the compiler can discard the MXU code for other systems.

I'm not sure there's actually any point using one of the ASE flags for 
it, since we know it's always present on the Ingenic CPUs we support & 
always not present on non-Ingenic CPUs. That is, "#define cpu_has_mxu 
IS_ENABLED(CONFIG_MACH_INGENIC)" would work just as well & be compile 
time constant, allowing the compiler to optimise better.

> +#endif
> +
>  #ifndef cpu_has_mipsmt
>  #define cpu_has_mipsmt		(cpu_data[0].ases & MIPS_ASE_MIPSMT)
>  #endif
> diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
> index f672df8..77ab797 100644
> --- a/arch/mips/include/asm/cpu.h
> +++ b/arch/mips/include/asm/cpu.h
> @@ -428,5 +428,6 @@ enum cpu_type_enum {
>  #define MIPS_ASE_VZ		0x00000080 /* Virtualization ASE */
>  #define MIPS_ASE_MSA		0x00000100 /* MIPS SIMD Architecture */
>  #define MIPS_ASE_DSP3		0x00000200 /* Signal Processing ASE Rev 3*/
> +#define MIPS_ASE_MXU		0x00000400 /* Xburst MXU instruction set */
>
>  #endif /* _ASM_CPU_H */
> diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
> new file mode 100644
> index 0000000..cf77cbd
> --- /dev/null
> +++ b/arch/mips/include/asm/mxu.h
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright (C) Ingenic Semiconductor
> + * File taken from Ingenic Semiconductor's linux repo available in github
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + */
> +#ifndef _ASM_MXU_H
> +#define _ASM_MXU_H
> +
> +#include <asm/cpu.h>
> +#include <asm/cpu-features.h>
> +#include <asm/hazards.h>
> +#include <asm/mipsregs.h>
> +
> +static inline void __enable_mxu(void)
> +{
> +	unsigned int register val asm("t0");
> +	val = 3;

Why 3? Please document the magic number. Judging from below bit 0 is 
enable, but what's bit 1? If you #define names for the bits then use 
those that would be great.

> +	asm volatile(".word	0x7008042f\n\t"::"r"(val));
> +}
> +
> +static inline void enable_mxu(void)
> +{
> +	if (cpu_has_mxu)
> +		__enable_mxu();
> +}
> +
> +static inline void disable_mxu(void)
> +{
> +	unsigned int register val asm("t0");
> +	val = 0;
> +	asm volatile(".word	0x7008042f\n\t"::"r"(val));
> +	asm("nop\n\t");
> +	asm("nop\n\t");
> +	asm("nop\n\t");

Why the nops? Does something go wrong without them? Can you explain in a 
comment?

On Sat, 25 Jun 2016, PrasannaK
> +}
> +
> +static inline int mxu_used(void)
> +{
> +	unsigned int register reg_val asm("t0");
> +	asm volatile(".word	0x7008042f\n\t"::"r"(reg_val));

This doesn't seem right at all - the instruction you used is the same as 
in enable_mxu() & disable_mxu(), but in those you'd want to write to 
xr16 and here you'd want to read it.

As Maciej asked, if you documented the instruction encodings this would 
be easier to read. Even better if you were to #define some meaningful 
names then use the names rather than raw encodings it would make it 
clearer what's going on.

> +	return reg_val &0x01;

Also this magix 0x1 should be #define'd to something useful, perhaps 
something like "#define MXU_XR16_ENABLE BIT(0)".

> +}
> +
> +static inline void __save_mxu(void *tsk_void)
> +{
> +	struct task_struct *tsk = tsk_void;
> +
> +	register unsigned int reg_val asm("t0");
> +
> +	asm volatile(".word	0x7008042e\n\t");
> +	tsk->thread.mxu.xr[0] = reg_val;

Although it's likely to work, as far as I understand there's nothing 
preventing GCC from clobbering t0 between the asm statement & the write 
to the context struct.

To quote 
https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables

   "Defining a register variable does not reserve the register. Other 
than when invoking the Extended asm, the contents of the specified 
register are not guaranteed."

To avoid this I think it may be best to implement the save & restore 
routines in asm just like we already do for FP & MSA contexts.

> +	asm volatile(".word	0x7008006e\n\t");
> +	tsk->thread.mxu.xr[1] = reg_val;
> +	asm volatile(".word	0x700800ae\n\t");
> +	tsk->thread.mxu.xr[2] = reg_val;
> +	asm volatile(".word	0x700800ee\n\t");
> +	tsk->thread.mxu.xr[3] = reg_val;
> +	asm volatile(".word	0x7008012e\n\t");
> +	tsk->thread.mxu.xr[4] = reg_val;
> +	asm volatile(".word	0x7008016e\n\t");
> +	tsk->thread.mxu.xr[5] = reg_val;
> +	asm volatile(".word	0x700801ae\n\t");
> +	tsk->thread.mxu.xr[6] = reg_val;
> +	asm volatile(".word	0x700801ee\n\t");
> +	tsk->thread.mxu.xr[7] = reg_val;
> +	asm volatile(".word	0x7008022e\n\t");
> +	tsk->thread.mxu.xr[8] = reg_val;
> +	asm volatile(".word	0x7008026e\n\t");
> +	tsk->thread.mxu.xr[9] = reg_val;
> +	asm volatile(".word	0x700802ae\n\t");
> +	tsk->thread.mxu.xr[10] = reg_val;
> +	asm volatile(".word	0x700802ee\n\t");
> +	tsk->thread.mxu.xr[11] = reg_val;
> +	asm volatile(".word	0x7008032e\n\t");
> +	tsk->thread.mxu.xr[12] = reg_val;
> +	asm volatile(".word	0x7008036e\n\t");
> +	tsk->thread.mxu.xr[13] = reg_val;
> +	asm volatile(".word	0x700803ae\n\t");
> +	tsk->thread.mxu.xr[14] = reg_val;
> +	asm volatile(".word	0x700803ee\n\t");
> +	tsk->thread.mxu.xr[15] = reg_val;
> +}
> +
> +static inline void __restore_mxu(void *tsk_void)
> +{
> +	struct task_struct *tsk = tsk_void;
> +
> +	register unsigned int reg_val asm("t0");
> +
> +	reg_val = tsk->thread.mxu.xr[0];
> +	asm volatile(".word	0x7008042f\n\t"::"r"(reg_val));

Same comment as for saving context - I don't think this is guaranteed to 
work.

> +	reg_val = tsk->thread.mxu.xr[1];
> +	asm volatile(".word	0x7008006f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[2];
> +	asm volatile(".word	0x700800af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[3];
> +	asm volatile(".word	0x700800ef\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[4];
> +	asm volatile(".word	0x7008012f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[5];
> +	asm volatile(".word	0x7008016f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[6];
> +	asm volatile(".word	0x700801af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[7];
> +	asm volatile(".word	0x700801ef\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[8];
> +	asm volatile(".word	0x7008022f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[9];
> +	asm volatile(".word	0x7008026f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[10];
> +	asm volatile(".word	0x700802af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[11];
> +	asm volatile(".word	0x700802ef\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[12];
> +	asm volatile(".word	0x7008032f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[13];
> +	asm volatile(".word	0x7008036f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[14];
> +	asm volatile(".word	0x700803af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[15];
> +	asm volatile(".word	0x700803ef\n\t"::"r"(reg_val));
> +}
> +
> +#define save_mxu(tsk)						\
> +	do {							\
> +		if (cpu_has_mxu)				\
> +			__save_mxu(tsk);			\
> +	} while (0)
> +
> +#define restore_mxu(tsk)					\
> +	do {							\
> +		if (cpu_has_mxu)				\
> +			__restore_mxu(tsk);			\
> +	} while (0)
> +
> +#define __get_mxu_regs(tsk)					\
> +	({							\
> +		if (tsk == current)				\
> +			__save_mxu(current);			\
> +								\
> +		tsk->thread.mxu.xr;				\
> +	})
> +
> +#define __let_mxu_regs(tsk, regs)				\
> +	do {							\
> +		int i;						\
> +		for (i = 0; i < NUM_MXU_REGS; i++)		\
> +			tsk->thread.mxu.xr[i] = regs[i];	\
> +		if (tsk == current)				\
> +			__save_mxu(current);			\
> +	} while (0)
> +
> +#endif /* _ASM_MXU_H */
> diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
> index 7e78b62..a4def30 100644
> --- a/arch/mips/include/asm/processor.h
> +++ b/arch/mips/include/asm/processor.h
> @@ -142,6 +142,11 @@ struct mips_dsp_state {
>  	unsigned int	dspcontrol;
>  };
>
> +#define NUM_MXU_REGS 16
> +struct xburst_mxu_state {
> +	unsigned int xr[NUM_MXU_REGS];
> +};
> +
>  #define INIT_CPUMASK { \
>  	{0,} \
>  }
> @@ -266,6 +271,10 @@ struct thread_struct {
>  	/* Saved state of the DSP ASE, if available. */
>  	struct mips_dsp_state dsp;
>
> +	unsigned int mxu_used;

Why not a thread info flag (ie. 1 bit) rather than 4 bytes?

> +	/* Saved registers of Xburst MXU, if available. */
> +	struct xburst_mxu_state mxu;
> +
>  	/* Saved watch register state, if available. */
>  	union mips_watch_reg_state watch;
>
> @@ -330,6 +339,10 @@ struct thread_struct {
>  		.dspr		= {0, },			\
>  		.dspcontrol	= 0,				\
>  	},							\
> +	.mxu_used		= 0,				\
> +	.mxu			= {				\
> +		.xr		= {0, },			\
> +	},							\
>  	/*							\
>  	 * saved watch register stuff				\
>  	 */							\
> @@ -410,4 +423,10 @@ extern int mips_set_process_fp_mode(struct task_struct *task,
>  #define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
>  #define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
>
> +extern int mips_enable_mxu_other_cpus(void);
> +extern int mips_disable_mxu_other_cpus(void);
> +
> +#define ENABLE_MXU_OTHER_CPUS()         mips_enable_mxu_other_cpus()
> +#define DISABLE_MXU_OTHER_CPUS()        mips_disable_mxu_other_cpus()
> +
>  #endif /* _ASM_PROCESSOR_H */
> diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
> index ebb5c0f..112aff5 100644
> --- a/arch/mips/include/asm/switch_to.h
> +++ b/arch/mips/include/asm/switch_to.h
> @@ -75,6 +75,43 @@ do {	if (cpu_has_rw_llb) {						\
>  	}								\
>  } while (0)
>
> +static inline void handle_mxu_state(struct task_struct *prev,
> +		struct task_struct *next)
> +{
> +	struct task_struct *thread = NULL;
> +
> +	if (prev->thread.mxu_used) {
> +		if (mxu_used() == 1) {
> +			__save_mxu(prev);
> +		} else {
> +			prev->thread.mxu_used = 0;

This seems weird. If I understand correctly then if a thread uses MXU in 
one timeslice then runs some non-MXU code in its next timeslice its MXU 
context may be lost before any third timeslice. That sounds bad. Think 
about if you had a program that was in the middle of running some 
MXU-using algorithm when it happens to receive a signal, and the MXU 
code is interrupted by some non-MXU code for a while long enough for 
this path to be hit, then the signal handler returns to the MXU-using 
code & its context is corrupt. Or without signals the same thing could 
happen if you just happened to call some complex/long running non-MXU 
code in the middle of your MXU-using code. That sounds bad!

> +			thread = prev;
> +			rcu_read_lock();
> +			while_each_thread(prev, thread) {
> +				thread->thread.mxu_used = 0;
> +			};
> +			rcu_read_unlock();

I think this would need some commenting to explain what's going on & I'm 
not concinved it's correct, but also as mentioned earlier I'm not sure 
the kernel should be taking responsibility for synchronising MXU state 
across threads so this could possibly be removed.

> +		}
> +	} else {
> +		if (mxu_used() == 1) {
> +			__save_mxu(prev);
> +			prev->thread.mxu_used = 1;
> +			thread = prev;
> +			rcu_read_lock();
> +			while_each_thread(prev, thread) {
> +				thread->thread.mxu_used = 1;
> +			};
> +			rcu_read_unlock();

Likewise.

> +		}
> +	}
> +	disable_mxu();
> +
> +	if (next->thread.mxu_used) {
> +		__restore_mxu(next);
> +		enable_mxu();
> +	}
> +}
> +

Another issue would be - should MXU be usable in signal handlers? If so 
then it needs to have context saved & restored around signals, as part 
of an extended context structure much like that used for MSA. If not 
then we probably need to document that restriction somewhere and check 
that anyone interested is OK with it. Ultimately that probably depends 
upon the goals here - if MXU were ever to be used for auto-vectorisation 
for example then it should probably gain that sigcontext save/restore code.

Thanks,
     Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
@ 2016-07-05  9:34   ` Paul Burton
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Burton @ 2016-07-05  9:34 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan
  Cc: linux-mips, linux-kernel, adobriyan, john.stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	keescook, jslaby, akpm, macro, f.fainelli, mingo, alex.smith,
	markos.chandras, Leonid.Yegoshin, david.daney, zhaoxiu.zeng,
	chenhc, Zubair.Kakakhel, james.hogan, ralf

Hi PrasannaKumar,

On 25/06/16 13:14, PrasannaKumar Muralidharan wrote:
> From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
>
> This patch adds support for context switching Xburst MXU registers. The
> registers are named xr0 to xr16. xr16 is the control register that can
> be used to enable and disable MXU instruction set. Read and write to
> these registers can be done without enabling MXU instruction set by user
> space. Only when MXU instruction set is enabled any MXU instruction
> (other than read or write to xr registers) can be done. xr0 is always 0.

Do you have any examples of userland programs making use of MXU? They 
would be useful in allowing people to test this patch.

How have you tested this?

> Kernel does not know when MXU instruction is enabled or disabled. So
> during context switch if MXU is enabled in xr16 register then MXU
> registers are saved, restored when the task is run.

I'm not convinced this is the right way to go. It seems complex & 
fragile vs the alternatives, the simplest of which could be to just 
always save & restore MXU context in kernels with MXU support. Is there 
a significant performance cost to just unconditionally saving & 
restoring the MXU context? That is after all what Ingenic's vendor 
kernel, which it looks like large parts of your patch are taken from, does.

> When user space
> application enables MXU, it is not reflected in other threads
> immediately. So for convenience the applications can use prctl syscall
> to let the MXU state propagate across threads running in different CPUs.

Surely it wouldn't be reflected at all, since each thread has its own 
MXU context? Would you expect applications to actually want to enable 
MXU on one thread & make use of it from other already running threads? 
Off the top of my head I can't think of why that would be useful, so I'm 
wondering whether it would be better to just let each thread handle 
enabling MXU if it wants & leave the kernel out of it. If we just save & 
restore unconditionally then this becomes a non-issue anyway.

> Signed-off-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>optimise out
> ---
>  arch/mips/include/asm/cpu-features.h |   4 +
>  arch/mips/include/asm/cpu.h          |   1 +
>  arch/mips/include/asm/mxu.h          | 157 +++++++++++++++++++++++++++++++++++
>  arch/mips/include/asm/processor.h    |  19 +++++
>  arch/mips/include/asm/switch_to.h    |  40 +++++++++
>  arch/mips/kernel/cpu-probe.c         |   1 +
>  arch/mips/kernel/process.c           |  48 +++++++++++
>  include/uapi/linux/prctl.h           |   3 +
>  kernel/sys.c                         |   6 ++
>  9 files changed, 279 insertions(+)
>  create mode 100644 arch/mips/include/asm/mxu.h
>
> diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
> index e961c8a..c6270b0 100644
> --- a/arch/mips/include/asm/cpu-features.h
> +++ b/arch/mips/include/asm/cpu-features.h
> @@ -345,6 +345,10 @@
>  #define cpu_has_dsp3		(cpu_data[0].ases & MIPS_ASE_DSP3)
>  #endif
>
> +#ifndef cpu_has_mxu
> +#define cpu_has_mxu		(cpu_data[0].ases & MIPS_ASE_MXU)

This really ought to be defined as a constant 0 for non-Ingenic kernels, 
so that the compiler can discard the MXU code for other systems.

I'm not sure there's actually any point using one of the ASE flags for 
it, since we know it's always present on the Ingenic CPUs we support & 
always not present on non-Ingenic CPUs. That is, "#define cpu_has_mxu 
IS_ENABLED(CONFIG_MACH_INGENIC)" would work just as well & be compile 
time constant, allowing the compiler to optimise better.

> +#endif
> +
>  #ifndef cpu_has_mipsmt
>  #define cpu_has_mipsmt		(cpu_data[0].ases & MIPS_ASE_MIPSMT)
>  #endif
> diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
> index f672df8..77ab797 100644
> --- a/arch/mips/include/asm/cpu.h
> +++ b/arch/mips/include/asm/cpu.h
> @@ -428,5 +428,6 @@ enum cpu_type_enum {
>  #define MIPS_ASE_VZ		0x00000080 /* Virtualization ASE */
>  #define MIPS_ASE_MSA		0x00000100 /* MIPS SIMD Architecture */
>  #define MIPS_ASE_DSP3		0x00000200 /* Signal Processing ASE Rev 3*/
> +#define MIPS_ASE_MXU		0x00000400 /* Xburst MXU instruction set */
>
>  #endif /* _ASM_CPU_H */
> diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
> new file mode 100644
> index 0000000..cf77cbd
> --- /dev/null
> +++ b/arch/mips/include/asm/mxu.h
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright (C) Ingenic Semiconductor
> + * File taken from Ingenic Semiconductor's linux repo available in github
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + */
> +#ifndef _ASM_MXU_H
> +#define _ASM_MXU_H
> +
> +#include <asm/cpu.h>
> +#include <asm/cpu-features.h>
> +#include <asm/hazards.h>
> +#include <asm/mipsregs.h>
> +
> +static inline void __enable_mxu(void)
> +{
> +	unsigned int register val asm("t0");
> +	val = 3;

Why 3? Please document the magic number. Judging from below bit 0 is 
enable, but what's bit 1? If you #define names for the bits then use 
those that would be great.

> +	asm volatile(".word	0x7008042f\n\t"::"r"(val));
> +}
> +
> +static inline void enable_mxu(void)
> +{
> +	if (cpu_has_mxu)
> +		__enable_mxu();
> +}
> +
> +static inline void disable_mxu(void)
> +{
> +	unsigned int register val asm("t0");
> +	val = 0;
> +	asm volatile(".word	0x7008042f\n\t"::"r"(val));
> +	asm("nop\n\t");
> +	asm("nop\n\t");
> +	asm("nop\n\t");

Why the nops? Does something go wrong without them? Can you explain in a 
comment?

On Sat, 25 Jun 2016, PrasannaK
> +}
> +
> +static inline int mxu_used(void)
> +{
> +	unsigned int register reg_val asm("t0");
> +	asm volatile(".word	0x7008042f\n\t"::"r"(reg_val));

This doesn't seem right at all - the instruction you used is the same as 
in enable_mxu() & disable_mxu(), but in those you'd want to write to 
xr16 and here you'd want to read it.

As Maciej asked, if you documented the instruction encodings this would 
be easier to read. Even better if you were to #define some meaningful 
names then use the names rather than raw encodings it would make it 
clearer what's going on.

> +	return reg_val &0x01;

Also this magix 0x1 should be #define'd to something useful, perhaps 
something like "#define MXU_XR16_ENABLE BIT(0)".

> +}
> +
> +static inline void __save_mxu(void *tsk_void)
> +{
> +	struct task_struct *tsk = tsk_void;
> +
> +	register unsigned int reg_val asm("t0");
> +
> +	asm volatile(".word	0x7008042e\n\t");
> +	tsk->thread.mxu.xr[0] = reg_val;

Although it's likely to work, as far as I understand there's nothing 
preventing GCC from clobbering t0 between the asm statement & the write 
to the context struct.

To quote 
https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables

   "Defining a register variable does not reserve the register. Other 
than when invoking the Extended asm, the contents of the specified 
register are not guaranteed."

To avoid this I think it may be best to implement the save & restore 
routines in asm just like we already do for FP & MSA contexts.

> +	asm volatile(".word	0x7008006e\n\t");
> +	tsk->thread.mxu.xr[1] = reg_val;
> +	asm volatile(".word	0x700800ae\n\t");
> +	tsk->thread.mxu.xr[2] = reg_val;
> +	asm volatile(".word	0x700800ee\n\t");
> +	tsk->thread.mxu.xr[3] = reg_val;
> +	asm volatile(".word	0x7008012e\n\t");
> +	tsk->thread.mxu.xr[4] = reg_val;
> +	asm volatile(".word	0x7008016e\n\t");
> +	tsk->thread.mxu.xr[5] = reg_val;
> +	asm volatile(".word	0x700801ae\n\t");
> +	tsk->thread.mxu.xr[6] = reg_val;
> +	asm volatile(".word	0x700801ee\n\t");
> +	tsk->thread.mxu.xr[7] = reg_val;
> +	asm volatile(".word	0x7008022e\n\t");
> +	tsk->thread.mxu.xr[8] = reg_val;
> +	asm volatile(".word	0x7008026e\n\t");
> +	tsk->thread.mxu.xr[9] = reg_val;
> +	asm volatile(".word	0x700802ae\n\t");
> +	tsk->thread.mxu.xr[10] = reg_val;
> +	asm volatile(".word	0x700802ee\n\t");
> +	tsk->thread.mxu.xr[11] = reg_val;
> +	asm volatile(".word	0x7008032e\n\t");
> +	tsk->thread.mxu.xr[12] = reg_val;
> +	asm volatile(".word	0x7008036e\n\t");
> +	tsk->thread.mxu.xr[13] = reg_val;
> +	asm volatile(".word	0x700803ae\n\t");
> +	tsk->thread.mxu.xr[14] = reg_val;
> +	asm volatile(".word	0x700803ee\n\t");
> +	tsk->thread.mxu.xr[15] = reg_val;
> +}
> +
> +static inline void __restore_mxu(void *tsk_void)
> +{
> +	struct task_struct *tsk = tsk_void;
> +
> +	register unsigned int reg_val asm("t0");
> +
> +	reg_val = tsk->thread.mxu.xr[0];
> +	asm volatile(".word	0x7008042f\n\t"::"r"(reg_val));

Same comment as for saving context - I don't think this is guaranteed to 
work.

> +	reg_val = tsk->thread.mxu.xr[1];
> +	asm volatile(".word	0x7008006f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[2];
> +	asm volatile(".word	0x700800af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[3];
> +	asm volatile(".word	0x700800ef\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[4];
> +	asm volatile(".word	0x7008012f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[5];
> +	asm volatile(".word	0x7008016f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[6];
> +	asm volatile(".word	0x700801af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[7];
> +	asm volatile(".word	0x700801ef\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[8];
> +	asm volatile(".word	0x7008022f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[9];
> +	asm volatile(".word	0x7008026f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[10];
> +	asm volatile(".word	0x700802af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[11];
> +	asm volatile(".word	0x700802ef\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[12];
> +	asm volatile(".word	0x7008032f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[13];
> +	asm volatile(".word	0x7008036f\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[14];
> +	asm volatile(".word	0x700803af\n\t"::"r"(reg_val));
> +	reg_val = tsk->thread.mxu.xr[15];
> +	asm volatile(".word	0x700803ef\n\t"::"r"(reg_val));
> +}
> +
> +#define save_mxu(tsk)						\
> +	do {							\
> +		if (cpu_has_mxu)				\
> +			__save_mxu(tsk);			\
> +	} while (0)
> +
> +#define restore_mxu(tsk)					\
> +	do {							\
> +		if (cpu_has_mxu)				\
> +			__restore_mxu(tsk);			\
> +	} while (0)
> +
> +#define __get_mxu_regs(tsk)					\
> +	({							\
> +		if (tsk == current)				\
> +			__save_mxu(current);			\
> +								\
> +		tsk->thread.mxu.xr;				\
> +	})
> +
> +#define __let_mxu_regs(tsk, regs)				\
> +	do {							\
> +		int i;						\
> +		for (i = 0; i < NUM_MXU_REGS; i++)		\
> +			tsk->thread.mxu.xr[i] = regs[i];	\
> +		if (tsk == current)				\
> +			__save_mxu(current);			\
> +	} while (0)
> +
> +#endif /* _ASM_MXU_H */
> diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
> index 7e78b62..a4def30 100644
> --- a/arch/mips/include/asm/processor.h
> +++ b/arch/mips/include/asm/processor.h
> @@ -142,6 +142,11 @@ struct mips_dsp_state {
>  	unsigned int	dspcontrol;
>  };
>
> +#define NUM_MXU_REGS 16
> +struct xburst_mxu_state {
> +	unsigned int xr[NUM_MXU_REGS];
> +};
> +
>  #define INIT_CPUMASK { \
>  	{0,} \
>  }
> @@ -266,6 +271,10 @@ struct thread_struct {
>  	/* Saved state of the DSP ASE, if available. */
>  	struct mips_dsp_state dsp;
>
> +	unsigned int mxu_used;

Why not a thread info flag (ie. 1 bit) rather than 4 bytes?

> +	/* Saved registers of Xburst MXU, if available. */
> +	struct xburst_mxu_state mxu;
> +
>  	/* Saved watch register state, if available. */
>  	union mips_watch_reg_state watch;
>
> @@ -330,6 +339,10 @@ struct thread_struct {
>  		.dspr		= {0, },			\
>  		.dspcontrol	= 0,				\
>  	},							\
> +	.mxu_used		= 0,				\
> +	.mxu			= {				\
> +		.xr		= {0, },			\
> +	},							\
>  	/*							\
>  	 * saved watch register stuff				\
>  	 */							\
> @@ -410,4 +423,10 @@ extern int mips_set_process_fp_mode(struct task_struct *task,
>  #define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
>  #define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
>
> +extern int mips_enable_mxu_other_cpus(void);
> +extern int mips_disable_mxu_other_cpus(void);
> +
> +#define ENABLE_MXU_OTHER_CPUS()         mips_enable_mxu_other_cpus()
> +#define DISABLE_MXU_OTHER_CPUS()        mips_disable_mxu_other_cpus()
> +
>  #endif /* _ASM_PROCESSOR_H */
> diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
> index ebb5c0f..112aff5 100644
> --- a/arch/mips/include/asm/switch_to.h
> +++ b/arch/mips/include/asm/switch_to.h
> @@ -75,6 +75,43 @@ do {	if (cpu_has_rw_llb) {						\
>  	}								\
>  } while (0)
>
> +static inline void handle_mxu_state(struct task_struct *prev,
> +		struct task_struct *next)
> +{
> +	struct task_struct *thread = NULL;
> +
> +	if (prev->thread.mxu_used) {
> +		if (mxu_used() == 1) {
> +			__save_mxu(prev);
> +		} else {
> +			prev->thread.mxu_used = 0;

This seems weird. If I understand correctly then if a thread uses MXU in 
one timeslice then runs some non-MXU code in its next timeslice its MXU 
context may be lost before any third timeslice. That sounds bad. Think 
about if you had a program that was in the middle of running some 
MXU-using algorithm when it happens to receive a signal, and the MXU 
code is interrupted by some non-MXU code for a while long enough for 
this path to be hit, then the signal handler returns to the MXU-using 
code & its context is corrupt. Or without signals the same thing could 
happen if you just happened to call some complex/long running non-MXU 
code in the middle of your MXU-using code. That sounds bad!

> +			thread = prev;
> +			rcu_read_lock();
> +			while_each_thread(prev, thread) {
> +				thread->thread.mxu_used = 0;
> +			};
> +			rcu_read_unlock();

I think this would need some commenting to explain what's going on & I'm 
not concinved it's correct, but also as mentioned earlier I'm not sure 
the kernel should be taking responsibility for synchronising MXU state 
across threads so this could possibly be removed.

> +		}
> +	} else {
> +		if (mxu_used() == 1) {
> +			__save_mxu(prev);
> +			prev->thread.mxu_used = 1;
> +			thread = prev;
> +			rcu_read_lock();
> +			while_each_thread(prev, thread) {
> +				thread->thread.mxu_used = 1;
> +			};
> +			rcu_read_unlock();

Likewise.

> +		}
> +	}
> +	disable_mxu();
> +
> +	if (next->thread.mxu_used) {
> +		__restore_mxu(next);
> +		enable_mxu();
> +	}
> +}
> +

Another issue would be - should MXU be usable in signal handlers? If so 
then it needs to have context saved & restored around signals, as part 
of an extended context structure much like that used for MSA. If not 
then we probably need to document that restriction somewhere and check 
that anyone interested is OK with it. Ultimately that probably depends 
upon the goals here - if MXU were ever to be used for auto-vectorisation 
for example then it should probably gain that sigcontext save/restore code.

Thanks,
     Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
  2016-07-04 21:45 ` Florian Fainelli
@ 2016-07-05  9:47   ` PrasannaKumar Muralidharan
  0 siblings, 0 replies; 13+ messages in thread
From: PrasannaKumar Muralidharan @ 2016-07-05  9:47 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: linux-mips, linux-kernel, Alexey Dobriyan, John Stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	Kees Cook, jslaby, akpm, macro, mingo, alex.smith,
	markos.chandras, Leonid.Yegoshin, david.daney, zhaoxiu.zeng,
	chenhc, Zubair.Kakakhel, james.hogan, Paul Burton, ralf

> This is all well and good and seems useful, but you have not stated why
> this is even useful in the first place?

Sorry, should have done that already. MXU is the name for SIMD
instructions found in Ingenic SoC. Registers xr0 to xr16 is used by
MXU instructions. I will add info when I submit next version of the
patch.

>> +     unsigned int mxu_used;
>> +     /* Saved registers of Xburst MXU, if available. */
>> +     struct xburst_mxu_state mxu;
>
> That's adding about 17 * 4 bytes to a structure that is probably best
> kept as small as possible, might be worth adding an ifdef here specific
> to the Ingenic platforms w/ MXU?

Yes, makes sense. Will do.

>> @@ -410,4 +423,10 @@ extern int mips_set_process_fp_mode(struct task_struct *task,
>>  #define GET_FP_MODE(task)            mips_get_process_fp_mode(task)
>>  #define SET_FP_MODE(task,value)              mips_set_process_fp_mode(task, value)
>>
>> +extern int mips_enable_mxu_other_cpus(void);
>> +extern int mips_disable_mxu_other_cpus(void);
>> +
>> +#define ENABLE_MXU_OTHER_CPUS()         mips_enable_mxu_other_cpus()
>> +#define DISABLE_MXU_OTHER_CPUS()        mips_disable_mxu_other_cpus()
>
> Where is the stub when building for !MIPS? Have you build a kernel with
> your changes for e.g: x86?

I have missed it. Will add it for sure. Did not check for other arch,
will do it before submitting next revision.

>> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
>> index a8d0759..b193d91 100644
>> --- a/include/uapi/linux/prctl.h
>> +++ b/include/uapi/linux/prctl.h
>> @@ -197,4 +197,7 @@ struct prctl_mm_map {
>>  # define PR_CAP_AMBIENT_LOWER                3
>>  # define PR_CAP_AMBIENT_CLEAR_ALL    4
>>
>> +#define PR_ENABLE_MXU_OTHER_CPUS        48
>> +#define PR_DISABLE_MXU_OTHER_CPUS       49
>> +
>>  #endif /* _LINUX_PRCTL_H */
>> diff --git a/kernel/sys.c b/kernel/sys.c
>> index 89d5be4..fbbc7b2 100644
>> --- a/kernel/sys.c
>> +++ b/kernel/sys.c
>> @@ -2270,6 +2270,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>>       case PR_GET_FP_MODE:
>>               error = GET_FP_MODE(me);
>>               break;
>> +     case PR_ENABLE_MXU_OTHER_CPUS:
>> +             error = ENABLE_MXU_OTHER_CPUS();
>> +             break;
>> +     case PR_DISABLE_MXU_OTHER_CPUS:
>> +             error = DISABLE_MXU_OTHER_CPUS();
>> +             break;
>
> Is not there a way to call into an architecture specific prctl() for the
> unhandled options passed to prctl()? Not everybody will
> want/implement/support that, and, more importantly changing generic code
> here looks fishy and probably the wrong abstraction.

PR_GET_FP_MODE is specific to MIPS and is present in arch independent
code. I followed similar structure. If it makes sense to put into arch
specific prctl() I will do that.

Thanks for your review. Really appreciate it.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
  2016-07-04 22:30   ` Maciej W. Rozycki
  (?)
@ 2016-07-05 10:07   ` PrasannaKumar Muralidharan
  2016-07-05 10:53       ` Maciej W. Rozycki
  -1 siblings, 1 reply; 13+ messages in thread
From: PrasannaKumar Muralidharan @ 2016-07-05 10:07 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: linux-mips, linux-kernel, Alexey Dobriyan, John Stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	Kees Cook, jslaby, Andrew Morton, Florian Fainelli, mingo,
	alex.smith, markos.chandras, Leonid Yegoshin, david.daney,
	zhaoxiu.zeng, chenhc, Zubair.Kakakhel, James Hogan, Paul Burton,
	Ralf Baechle

On 5 July 2016 at 04:00, Maciej W. Rozycki <macro@imgtec.com> wrote:
> On Sat, 25 Jun 2016, PrasannaKumar Muralidharan wrote:
>
>> diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
>> new file mode 100644
>> index 0000000..cf77cbd
>> --- /dev/null
>> +++ b/arch/mips/include/asm/mxu.h
>> @@ -0,0 +1,157 @@
>> +/*
>> + * Copyright (C) Ingenic Semiconductor
>> + * File taken from Ingenic Semiconductor's linux repo available in github
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License as published by the
>> + * Free Software Foundation;  either version 2 of the  License, or (at your
>> + * option) any later version.
>> + */
>> +#ifndef _ASM_MXU_H
>> +#define _ASM_MXU_H
>> +
>> +#include <asm/cpu.h>
>> +#include <asm/cpu-features.h>
>> +#include <asm/hazards.h>
>> +#include <asm/mipsregs.h>
>> +
>> +static inline void __enable_mxu(void)
>> +{
>> +     unsigned int register val asm("t0");
>> +     val = 3;
>> +     asm volatile(".word     0x7008042f\n\t"::"r"(val));
>
>  Can you please document your manually generated machine code, i.e. what
> instruction 0x7008042f actually is?

I have taken this header from vendor kernel. This instruction saves 3
to xr16 register. I will document them in the next revision.

>  Also our convention has been to separate asm operands with spaces, and
> there's no need for a new line or a tab character at the end of an
> inline as GCC will add these automatically as needed, i.e.:
>
>         asm volatile(".word     0x7008042f" : : "r" (val));
>
> Likewise throughout.

Will follow the convention.

>> +static inline void __save_mxu(void *tsk_void)
>> +{
>> +     struct task_struct *tsk = tsk_void;
>> +
>> +     register unsigned int reg_val asm("t0");
>> +
>> +     asm volatile(".word     0x7008042e\n\t");
>> +     tsk->thread.mxu.xr[0] = reg_val;
>> +     asm volatile(".word     0x7008006e\n\t");
>> +     tsk->thread.mxu.xr[1] = reg_val;
>> +     asm volatile(".word     0x700800ae\n\t");
>> +     tsk->thread.mxu.xr[2] = reg_val;
>> +     asm volatile(".word     0x700800ee\n\t");
>> +     tsk->thread.mxu.xr[3] = reg_val;
>> +     asm volatile(".word     0x7008012e\n\t");
>> +     tsk->thread.mxu.xr[4] = reg_val;
>> +     asm volatile(".word     0x7008016e\n\t");
>> +     tsk->thread.mxu.xr[5] = reg_val;
>> +     asm volatile(".word     0x700801ae\n\t");
>> +     tsk->thread.mxu.xr[6] = reg_val;
>> +     asm volatile(".word     0x700801ee\n\t");
>> +     tsk->thread.mxu.xr[7] = reg_val;
>> +     asm volatile(".word     0x7008022e\n\t");
>> +     tsk->thread.mxu.xr[8] = reg_val;
>> +     asm volatile(".word     0x7008026e\n\t");
>> +     tsk->thread.mxu.xr[9] = reg_val;
>> +     asm volatile(".word     0x700802ae\n\t");
>> +     tsk->thread.mxu.xr[10] = reg_val;
>> +     asm volatile(".word     0x700802ee\n\t");
>> +     tsk->thread.mxu.xr[11] = reg_val;
>> +     asm volatile(".word     0x7008032e\n\t");
>> +     tsk->thread.mxu.xr[12] = reg_val;
>> +     asm volatile(".word     0x7008036e\n\t");
>> +     tsk->thread.mxu.xr[13] = reg_val;
>> +     asm volatile(".word     0x700803ae\n\t");
>> +     tsk->thread.mxu.xr[14] = reg_val;
>> +     asm volatile(".word     0x700803ee\n\t");
>> +     tsk->thread.mxu.xr[15] = reg_val;
>> +}
>
>  Not using an output operand with asms here?

I think the instruction saves the xr* register value to reg_val
without need for output operand.

Thanks for your review, appreciate it.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
@ 2016-07-05 10:53       ` Maciej W. Rozycki
  0 siblings, 0 replies; 13+ messages in thread
From: Maciej W. Rozycki @ 2016-07-05 10:53 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan
  Cc: linux-mips, linux-kernel, Alexey Dobriyan, John Stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	Kees Cook, jslaby, Andrew Morton, Florian Fainelli, mingo,
	alex.smith, markos.chandras, Leonid Yegoshin, david.daney,
	zhaoxiu.zeng, chenhc, Zubair.Kakakhel, James Hogan, Paul Burton,
	Ralf Baechle

On Tue, 5 Jul 2016, PrasannaKumar Muralidharan wrote:

> >> +     asm volatile(".word     0x700803ee\n\t");
> >> +     tsk->thread.mxu.xr[15] = reg_val;
> >> +}
> >
> >  Not using an output operand with asms here?
> 
> I think the instruction saves the xr* register value to reg_val
> without need for output operand.

 It does, a classic `asm' is supposed to act as an optimisation barrier.  

 However since you've used an operand `asm' in `__restore_mxu' I think 
it would be good for consistency to use one here as well; it would make 
this piece of code a little bit more readable too, as you wouldn't have 
to guess that the `asm' writes to `reg_val' then.

 So I suggest that you convert `__save_mxu' to use an operand `asm' as 
well.

  Maciej

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
@ 2016-07-05 10:53       ` Maciej W. Rozycki
  0 siblings, 0 replies; 13+ messages in thread
From: Maciej W. Rozycki @ 2016-07-05 10:53 UTC (permalink / raw)
  To: PrasannaKumar Muralidharan
  Cc: linux-mips, linux-kernel, Alexey Dobriyan, John Stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	Kees Cook, jslaby, Andrew Morton, Florian Fainelli, mingo,
	alex.smith, markos.chandras, Leonid Yegoshin, david.daney,
	zhaoxiu.zeng, chenhc, Zubair.Kakakhel, James Hogan, Paul Burton,
	Ralf Baechle

On Tue, 5 Jul 2016, PrasannaKumar Muralidharan wrote:

> >> +     asm volatile(".word     0x700803ee\n\t");
> >> +     tsk->thread.mxu.xr[15] = reg_val;
> >> +}
> >
> >  Not using an output operand with asms here?
> 
> I think the instruction saves the xr* register value to reg_val
> without need for output operand.

 It does, a classic `asm' is supposed to act as an optimisation barrier.  

 However since you've used an operand `asm' in `__restore_mxu' I think 
it would be good for consistency to use one here as well; it would make 
this piece of code a little bit more readable too, as you wouldn't have 
to guess that the `asm' writes to `reg_val' then.

 So I suggest that you convert `__save_mxu' to use an operand `asm' as 
well.

  Maciej

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
  2016-07-05  9:34   ` Paul Burton
  (?)
@ 2016-07-05 15:21   ` PrasannaKumar Muralidharan
  -1 siblings, 0 replies; 13+ messages in thread
From: PrasannaKumar Muralidharan @ 2016-07-05 15:21 UTC (permalink / raw)
  To: Paul Burton
  Cc: linux-mips, linux-kernel, Alexey Dobriyan, John Stultz, mguzik,
	athorlton, mhocko, ebiederm, gorcunov, luto, cl, serge.hallyn,
	Kees Cook, jslaby, Andrew Morton, Maciej W. Rozycki,
	Florian Fainelli, mingo, alex.smith, markos.chandras,
	Leonid Yegoshin, david.daney, zhaoxiu.zeng, chenhc,
	Zubair.Kakakhel, James Hogan, Ralf Baechle

On 5 July 2016 at 15:04, Paul Burton <paul.burton@imgtec.com> wrote:
> Hi PrasannaKumar,
>
> On 25/06/16 13:14, PrasannaKumar Muralidharan wrote:
>>
>> From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
>>
>> This patch adds support for context switching Xburst MXU registers. The
>> registers are named xr0 to xr16. xr16 is the control register that can
>> be used to enable and disable MXU instruction set. Read and write to
>> these registers can be done without enabling MXU instruction set by user
>> space. Only when MXU instruction set is enabled any MXU instruction
>> (other than read or write to xr registers) can be done. xr0 is always 0.
>
>
> Do you have any examples of userland programs making use of MXU? They would
> be useful in allowing people to test this patch.

There is a modified mplayer from ingenic but it assumes MXU is enabled
by default.

> How have you tested this?

I am using a very simple program to test. Will post it somewhere
online and share the link.

>> Kernel does not know when MXU instruction is enabled or disabled. So
>> during context switch if MXU is enabled in xr16 register then MXU
>> registers are saved, restored when the task is run.
>
>
> I'm not convinced this is the right way to go. It seems complex & fragile vs
> the alternatives, the simplest of which could be to just always save &
> restore MXU context in kernels with MXU support. Is there a significant
> performance cost to just unconditionally saving & restoring the MXU context?
> That is after all what Ingenic's vendor kernel, which it looks like large
> parts of your patch are taken from, does.

I did not test the impact of save & restore MXU context on every task.
Any pointers to do it will help.

>> When user space
>> application enables MXU, it is not reflected in other threads
>> immediately. So for convenience the applications can use prctl syscall
>> to let the MXU state propagate across threads running in different CPUs.
>
>
> Surely it wouldn't be reflected at all, since each thread has its own MXU
> context? Would you expect applications to actually want to enable MXU on one
> thread & make use of it from other already running threads? Off the top of
> my head I can't think of why that would be useful, so I'm wondering whether
> it would be better to just let each thread handle enabling MXU if it wants &
> leave the kernel out of it. If we just save & restore unconditionally then
> this becomes a non-issue anyway.

Consider a case where a class's constructor enables MXU when first
instance of it is created, disables MXU when the last instance of it
is destructed and Garbage collector (running in its own thread) is
responsible for cleanup. This is not an existing use case. If this use
case seems unlikely then I will make MXU save and restore per task.

>> Signed-off-by: PrasannaKumar Muralidharan
>> <prasannatsmkumar@gmail.com>optimise out
>> ---
>>  arch/mips/include/asm/cpu-features.h |   4 +
>>  arch/mips/include/asm/cpu.h          |   1 +
>>  arch/mips/include/asm/mxu.h          | 157
>> +++++++++++++++++++++++++++++++++++
>>  arch/mips/include/asm/processor.h    |  19 +++++
>>  arch/mips/include/asm/switch_to.h    |  40 +++++++++
>>  arch/mips/kernel/cpu-probe.c         |   1 +
>>  arch/mips/kernel/process.c           |  48 +++++++++++
>>  include/uapi/linux/prctl.h           |   3 +
>>  kernel/sys.c                         |   6 ++
>>  9 files changed, 279 insertions(+)
>>  create mode 100644 arch/mips/include/asm/mxu.h
>>
>> diff --git a/arch/mips/include/asm/cpu-features.h
>> b/arch/mips/include/asm/cpu-features.h
>> index e961c8a..c6270b0 100644
>> --- a/arch/mips/include/asm/cpu-features.h
>> +++ b/arch/mips/include/asm/cpu-features.h
>> @@ -345,6 +345,10 @@
>>  #define cpu_has_dsp3           (cpu_data[0].ases & MIPS_ASE_DSP3)
>>  #endif
>>
>> +#ifndef cpu_has_mxu
>> +#define cpu_has_mxu            (cpu_data[0].ases & MIPS_ASE_MXU)
>
>
> This really ought to be defined as a constant 0 for non-Ingenic kernels, so
> that the compiler can discard the MXU code for other systems.
>
> I'm not sure there's actually any point using one of the ASE flags for it,
> since we know it's always present on the Ingenic CPUs we support & always
> not present on non-Ingenic CPUs. That is, "#define cpu_has_mxu
> IS_ENABLED(CONFIG_MACH_INGENIC)" would work just as well & be compile time
> constant, allowing the compiler to optimise better.

Completely agree. Will make necessary change in the next version.

>> +#endif
>> +
>>  #ifndef cpu_has_mipsmt
>>  #define cpu_has_mipsmt         (cpu_data[0].ases & MIPS_ASE_MIPSMT)
>>  #endif
>> diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
>> index f672df8..77ab797 100644
>> --- a/arch/mips/include/asm/cpu.h
>> +++ b/arch/mips/include/asm/cpu.h
>> @@ -428,5 +428,6 @@ enum cpu_type_enum {
>>  #define MIPS_ASE_VZ            0x00000080 /* Virtualization ASE */
>>  #define MIPS_ASE_MSA           0x00000100 /* MIPS SIMD Architecture */
>>  #define MIPS_ASE_DSP3          0x00000200 /* Signal Processing ASE Rev
>> 3*/
>> +#define MIPS_ASE_MXU           0x00000400 /* Xburst MXU instruction set
>> */
>>
>>  #endif /* _ASM_CPU_H */
>> diff --git a/arch/mips/include/asm/mxu.h b/arch/mips/include/asm/mxu.h
>> new file mode 100644
>> index 0000000..cf77cbd
>> --- /dev/null
>> +++ b/arch/mips/include/asm/mxu.h
>> @@ -0,0 +1,157 @@
>> +/*
>> + * Copyright (C) Ingenic Semiconductor
>> + * File taken from Ingenic Semiconductor's linux repo available in github
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> it
>> + * under the terms of the GNU General Public License as published by the
>> + * Free Software Foundation;  either version 2 of the  License, or (at
>> your
>> + * option) any later version.
>> + */
>> +#ifndef _ASM_MXU_H
>> +#define _ASM_MXU_H
>> +
>> +#include <asm/cpu.h>
>> +#include <asm/cpu-features.h>
>> +#include <asm/hazards.h>
>> +#include <asm/mipsregs.h>
>> +
>> +static inline void __enable_mxu(void)
>> +{
>> +       unsigned int register val asm("t0");
>> +       val = 3;
>
>
> Why 3? Please document the magic number. Judging from below bit 0 is enable,
> but what's bit 1? If you #define names for the bits then use those that
> would be great.

Will do document these magic numbers.

>> +       asm volatile(".word     0x7008042f\n\t"::"r"(val));
>> +}
>> +
>> +static inline void enable_mxu(void)
>> +{
>> +       if (cpu_has_mxu)
>> +               __enable_mxu();
>> +}
>> +
>> +static inline void disable_mxu(void)
>> +{
>> +       unsigned int register val asm("t0");
>> +       val = 0;
>> +       asm volatile(".word     0x7008042f\n\t"::"r"(val));
>> +       asm("nop\n\t");
>> +       asm("nop\n\t");
>> +       asm("nop\n\t");
>
>
> Why the nops? Does something go wrong without them? Can you explain in a
> comment?
>
> On Sat, 25 Jun 2016, PrasannaK
>>
>> +}
>> +
>> +static inline int mxu_used(void)
>> +{
>> +       unsigned int register reg_val asm("t0");
>> +       asm volatile(".word     0x7008042f\n\t"::"r"(reg_val));
>
>
> This doesn't seem right at all - the instruction you used is the same as in
> enable_mxu() & disable_mxu(), but in those you'd want to write to xr16 and
> here you'd want to read it.

I have messed up while porting my changes from 3.0 kernel to
linux-next. disable_mxu and enable_mxu have different instruction, nop
should come inside enable_mxu.

I could not find a make linux-next run with my existing user land in
any of the boards I have, so I have to use older kernel for testing. I
have clearly overlooked this, a big mistake from my side, I sincerely
apologise for this.

> As Maciej asked, if you documented the instruction encodings this would be
> easier to read. Even better if you were to #define some meaningful names
> then use the names rather than raw encodings it would make it clearer what's
> going on.
>
>> +       return reg_val &0x01;
>
>
> Also this magix 0x1 should be #define'd to something useful, perhaps
> something like "#define MXU_XR16_ENABLE BIT(0)".

Sure will do in the next version.

>> +}
>> +
>> +static inline void __save_mxu(void *tsk_void)
>> +{
>> +       struct task_struct *tsk = tsk_void;
>> +
>> +       register unsigned int reg_val asm("t0");
>> +
>> +       asm volatile(".word     0x7008042e\n\t");
>> +       tsk->thread.mxu.xr[0] = reg_val;
>
>
> Although it's likely to work, as far as I understand there's nothing
> preventing GCC from clobbering t0 between the asm statement & the write to
> the context struct.
>
> To quote
> https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
>
>   "Defining a register variable does not reserve the register. Other than
> when invoking the Extended asm, the contents of the specified register are
> not guaranteed."
>
> To avoid this I think it may be best to implement the save & restore
> routines in asm just like we already do for FP & MSA contexts.
>
>
>> +       asm volatile(".word     0x7008006e\n\t");
>> +       tsk->thread.mxu.xr[1] = reg_val;
>> +       asm volatile(".word     0x700800ae\n\t");
>> +       tsk->thread.mxu.xr[2] = reg_val;
>> +       asm volatile(".word     0x700800ee\n\t");
>> +       tsk->thread.mxu.xr[3] = reg_val;
>> +       asm volatile(".word     0x7008012e\n\t");
>> +       tsk->thread.mxu.xr[4] = reg_val;
>> +       asm volatile(".word     0x7008016e\n\t");
>> +       tsk->thread.mxu.xr[5] = reg_val;
>> +       asm volatile(".word     0x700801ae\n\t");
>> +       tsk->thread.mxu.xr[6] = reg_val;
>> +       asm volatile(".word     0x700801ee\n\t");
>> +       tsk->thread.mxu.xr[7] = reg_val;
>> +       asm volatile(".word     0x7008022e\n\t");
>> +       tsk->thread.mxu.xr[8] = reg_val;
>> +       asm volatile(".word     0x7008026e\n\t");
>> +       tsk->thread.mxu.xr[9] = reg_val;
>> +       asm volatile(".word     0x700802ae\n\t");
>> +       tsk->thread.mxu.xr[10] = reg_val;
>> +       asm volatile(".word     0x700802ee\n\t");
>> +       tsk->thread.mxu.xr[11] = reg_val;
>> +       asm volatile(".word     0x7008032e\n\t");
>> +       tsk->thread.mxu.xr[12] = reg_val;
>> +       asm volatile(".word     0x7008036e\n\t");
>> +       tsk->thread.mxu.xr[13] = reg_val;
>> +       asm volatile(".word     0x700803ae\n\t");
>> +       tsk->thread.mxu.xr[14] = reg_val;
>> +       asm volatile(".word     0x700803ee\n\t");
>> +       tsk->thread.mxu.xr[15] = reg_val;
>> +}
>> +
>> +static inline void __restore_mxu(void *tsk_void)
>> +{
>> +       struct task_struct *tsk = tsk_void;
>> +
>> +       register unsigned int reg_val asm("t0");
>> +
>> +       reg_val = tsk->thread.mxu.xr[0];
>> +       asm volatile(".word     0x7008042f\n\t"::"r"(reg_val));
>
>
> Same comment as for saving context - I don't think this is guaranteed to
> work.

Will this be a problem as code runs with preemption disabled (inside
switch_to)? If so it I will change.

>
>> +       reg_val = tsk->thread.mxu.xr[1];
>> +       asm volatile(".word     0x7008006f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[2];
>> +       asm volatile(".word     0x700800af\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[3];
>> +       asm volatile(".word     0x700800ef\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[4];
>> +       asm volatile(".word     0x7008012f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[5];
>> +       asm volatile(".word     0x7008016f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[6];
>> +       asm volatile(".word     0x700801af\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[7];
>> +       asm volatile(".word     0x700801ef\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[8];
>> +       asm volatile(".word     0x7008022f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[9];
>> +       asm volatile(".word     0x7008026f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[10];
>> +       asm volatile(".word     0x700802af\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[11];
>> +       asm volatile(".word     0x700802ef\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[12];
>> +       asm volatile(".word     0x7008032f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[13];
>> +       asm volatile(".word     0x7008036f\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[14];
>> +       asm volatile(".word     0x700803af\n\t"::"r"(reg_val));
>> +       reg_val = tsk->thread.mxu.xr[15];
>> +       asm volatile(".word     0x700803ef\n\t"::"r"(reg_val));
>> +}
>> +
>> +#define save_mxu(tsk)                                          \
>> +       do {                                                    \
>> +               if (cpu_has_mxu)                                \
>> +                       __save_mxu(tsk);                        \
>> +       } while (0)
>> +
>> +#define restore_mxu(tsk)                                       \
>> +       do {                                                    \
>> +               if (cpu_has_mxu)                                \
>> +                       __restore_mxu(tsk);                     \
>> +       } while (0)
>> +
>> +#define __get_mxu_regs(tsk)                                    \
>> +       ({                                                      \
>> +               if (tsk == current)                             \
>> +                       __save_mxu(current);                    \
>> +                                                               \
>> +               tsk->thread.mxu.xr;                             \
>> +       })
>> +
>> +#define __let_mxu_regs(tsk, regs)                              \
>> +       do {                                                    \
>> +               int i;                                          \
>> +               for (i = 0; i < NUM_MXU_REGS; i++)              \
>> +                       tsk->thread.mxu.xr[i] = regs[i];        \
>> +               if (tsk == current)                             \
>> +                       __save_mxu(current);                    \
>> +       } while (0)
>> +
>> +#endif /* _ASM_MXU_H */
>> diff --git a/arch/mips/include/asm/processor.h
>> b/arch/mips/include/asm/processor.h
>> index 7e78b62..a4def30 100644
>> --- a/arch/mips/include/asm/processor.h
>> +++ b/arch/mips/include/asm/processor.h
>> @@ -142,6 +142,11 @@ struct mips_dsp_state {
>>         unsigned int    dspcontrol;
>>  };
>>
>> +#define NUM_MXU_REGS 16
>> +struct xburst_mxu_state {
>> +       unsigned int xr[NUM_MXU_REGS];
>> +};
>> +
>>  #define INIT_CPUMASK { \
>>         {0,} \
>>  }
>> @@ -266,6 +271,10 @@ struct thread_struct {
>>         /* Saved state of the DSP ASE, if available. */
>>         struct mips_dsp_state dsp;
>>
>> +       unsigned int mxu_used;
>
>
> Why not a thread info flag (ie. 1 bit) rather than 4 bytes?

Okay. Will change.

>> +       /* Saved registers of Xburst MXU, if available. */
>> +       struct xburst_mxu_state mxu;
>> +
>>         /* Saved watch register state, if available. */
>>         union mips_watch_reg_state watch;
>>
>> @@ -330,6 +339,10 @@ struct thread_struct {
>>                 .dspr           = {0, },                        \
>>                 .dspcontrol     = 0,                            \
>>         },                                                      \
>> +       .mxu_used               = 0,                            \
>> +       .mxu                    = {                             \
>> +               .xr             = {0, },                        \
>> +       },                                                      \
>>         /*                                                      \
>>          * saved watch register stuff                           \
>>          */                                                     \
>> @@ -410,4 +423,10 @@ extern int mips_set_process_fp_mode(struct
>> task_struct *task,
>>  #define GET_FP_MODE(task)              mips_get_process_fp_mode(task)
>>  #define SET_FP_MODE(task,value)
>> mips_set_process_fp_mode(task, value)
>>
>> +extern int mips_enable_mxu_other_cpus(void);
>> +extern int mips_disable_mxu_other_cpus(void);
>> +
>> +#define ENABLE_MXU_OTHER_CPUS()         mips_enable_mxu_other_cpus()
>> +#define DISABLE_MXU_OTHER_CPUS()        mips_disable_mxu_other_cpus()
>> +
>>  #endif /* _ASM_PROCESSOR_H */
>> diff --git a/arch/mips/include/asm/switch_to.h
>> b/arch/mips/include/asm/switch_to.h
>> index ebb5c0f..112aff5 100644
>> --- a/arch/mips/include/asm/switch_to.h
>> +++ b/arch/mips/include/asm/switch_to.h
>> @@ -75,6 +75,43 @@ do { if (cpu_has_rw_llb) {
>> \
>>         }                                                               \
>>  } while (0)
>>
>> +static inline void handle_mxu_state(struct task_struct *prev,
>> +               struct task_struct *next)
>> +{
>> +       struct task_struct *thread = NULL;
>> +
>> +       if (prev->thread.mxu_used) {
>> +               if (mxu_used() == 1) {
>> +                       __save_mxu(prev);
>> +               } else {
>> +                       prev->thread.mxu_used = 0;
>
>
> This seems weird. If I understand correctly then if a thread uses MXU in one
> timeslice then runs some non-MXU code in its next timeslice its MXU context
> may be lost before any third timeslice. That sounds bad. Think about if you
> had a program that was in the middle of running some MXU-using algorithm
> when it happens to receive a signal, and the MXU code is interrupted by some
> non-MXU code for a while long enough for this path to be hit, then the
> signal handler returns to the MXU-using code & its context is corrupt. Or
> without signals the same thing could happen if you just happened to call
> some complex/long running non-MXU code in the middle of your MXU-using code.
> That sounds bad!

I am sorry that this confusion would have arised because of using
'mxu_used' in both the places (thread.mxu_used for stored state and
mxu_used() to get current MXU state from cpu).

>> +                       thread = prev;
>> +                       rcu_read_lock();
>> +                       while_each_thread(prev, thread) {
>> +                               thread->thread.mxu_used = 0;
>> +                       };
>> +                       rcu_read_unlock();
>
>
> I think this would need some commenting to explain what's going on & I'm not
> concinved it's correct, but also as mentioned earlier I'm not sure the
> kernel should be taking responsibility for synchronising MXU state across
> threads so this could possibly be removed.
>
>> +               }
>> +       } else {
>> +               if (mxu_used() == 1) {
>> +                       __save_mxu(prev);
>> +                       prev->thread.mxu_used = 1;
>> +                       thread = prev;
>> +                       rcu_read_lock();
>> +                       while_each_thread(prev, thread) {
>> +                               thread->thread.mxu_used = 1;
>> +                       };
>> +                       rcu_read_unlock();
>
>
> Likewise.
>
>> +               }
>> +       }
>> +       disable_mxu();
>> +
>> +       if (next->thread.mxu_used) {
>> +               __restore_mxu(next);
>> +               enable_mxu();
>> +       }
>> +}
>> +

In all the cases the MXU state is propogated to other threads in the
application.

> Another issue would be - should MXU be usable in signal handlers? If so then
> it needs to have context saved & restored around signals, as part of an
> extended context structure much like that used for MSA. If not then we
> probably need to document that restriction somewhere and check that anyone
> interested is OK with it. Ultimately that probably depends upon the goals
> here - if MXU were ever to be used for auto-vectorisation for example then
> it should probably gain that sigcontext save/restore code.

Compiler generating MXU instruction without inline asm code may not
happen in near future. For now that case can be ignored. I agree that
it should be documented.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] mips: Add MXU context switching support
  2016-07-05  9:34   ` Paul Burton
  (?)
  (?)
@ 2016-07-05 16:17   ` Andy Lutomirski
  -1 siblings, 0 replies; 13+ messages in thread
From: Andy Lutomirski @ 2016-07-05 16:17 UTC (permalink / raw)
  To: Paul Burton
  Cc: Alex Thorlton, zhaoxiu.zeng, Ingo Molnar, Markos Chandras,
	chenhc, Andrew Morton, Christoph Lameter, Ralf Baechle,
	Alexey Dobriyan, Zubair.Kakakhel, alex.smith,
	PrasannaKumar Muralidharan, Leonid Yegoshin, f.fainelli,
	Mateusz Guzik, Eric W. Biederman, Michal Hocko,
	Maciej W. Rozycki, Kees Cook, Linux MIPS Mailing List,
	Serge Hallyn, John Stultz, James Hogan, linux-kernel,
	Cyrill Gorcunov, David Daney, Jiri Slaby

On Jul 5, 2016 5:35 AM, "Paul Burton" <paul.burton@imgtec.com> wrote:
>
> Hi PrasannaKumar,
>
>
> On 25/06/16 13:14, PrasannaKumar Muralidharan wrote:
>>
>> From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
>>
>> This patch adds support for context switching Xburst MXU registers. The
>> registers are named xr0 to xr16. xr16 is the control register that can
>> be used to enable and disable MXU instruction set. Read and write to
>> these registers can be done without enabling MXU instruction set by user
>> space. Only when MXU instruction set is enabled any MXU instruction
>> (other than read or write to xr registers) can be done. xr0 is always 0.
>
>
> Do you have any examples of userland programs making use of MXU? They would be useful in allowing people to test this patch.
>
> How have you tested this?
>
>
>> Kernel does not know when MXU instruction is enabled or disabled. So
>> during context switch if MXU is enabled in xr16 register then MXU
>> registers are saved, restored when the task is run.
>
>
> I'm not convinced this is the right way to go. It seems complex & fragile vs the alternatives, the simplest of which could be to just always save & restore MXU context in kernels with MXU support. Is there a significant performance cost to just unconditionally saving & restoring the MXU context? That is after all what Ingenic's vendor kernel, which it looks like large parts of your patch are taken from, does.
>
>
>> When user space
>> application enables MXU, it is not reflected in other threads
>> immediately. So for convenience the applications can use prctl syscall
>> to let the MXU state propagate across threads running in different CPUs.
>
>
> Surely it wouldn't be reflected at all, since each thread has its own MXU context? Would you expect applications to actually want to enable MXU on one thread & make use of it from other already running threads? Off the top of my head I can't think of why that would be useful, so I'm wondering whether it would be better to just let each thread handle enabling MXU if it wants & leave the kernel out of it. If we just save & restore unconditionally then this becomes a non-issue anyway.
>

I don't know much about MIPS, but switching save/restore off depending
on a bit of *user* state sounds like a gaping security hole.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-07-05 16:18 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-25 12:14 [RFC] mips: Add MXU context switching support PrasannaKumar Muralidharan
2016-07-03 13:17 ` PrasannaKumar Muralidharan
2016-07-04 21:45 ` Florian Fainelli
2016-07-05  9:47   ` PrasannaKumar Muralidharan
2016-07-04 22:30 ` Maciej W. Rozycki
2016-07-04 22:30   ` Maciej W. Rozycki
2016-07-05 10:07   ` PrasannaKumar Muralidharan
2016-07-05 10:53     ` Maciej W. Rozycki
2016-07-05 10:53       ` Maciej W. Rozycki
2016-07-05  9:34 ` Paul Burton
2016-07-05  9:34   ` Paul Burton
2016-07-05 15:21   ` PrasannaKumar Muralidharan
2016-07-05 16:17   ` Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.