All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/10] xtensa: support coprocessors on SMP
@ 2022-04-21 10:10 Max Filippov
  2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
                   ` (9 more replies)
  0 siblings, 10 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Hello,

this series does a bunch of small cleanups around exception and
coprocessor handling code and adds coprocessors support in SMP
configurations.

Changes v1->v2:

- clean up exception handler prototypes
- merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB
- get rid of stack frame in coprocessor_flush
- document rules for coprocessor context management
- clean up context management from the LKMM point of view, introduce
  and document barriers
- support CPU hotplug

Max Filippov (10):
  xtensa: clean up function declarations in traps.c
  xtensa: clean up exception handler prototypes
  xtensa: clean up declarations in coprocessor.h
  xtensa: clean up excsave1 initialization
  xtensa: use callx0 opcode in fast_coprocessor
  xtensa: handle coprocessor exceptions in kernel mode
  xtensa: add xtensa_xsr macro
  xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB
  xtensa: get rid of stack frame in coprocessor_flush
  xtensa: support coprocessors on SMP

 arch/xtensa/include/asm/coprocessor.h |  11 +-
 arch/xtensa/include/asm/processor.h   |   7 +
 arch/xtensa/include/asm/thread_info.h |   7 +-
 arch/xtensa/include/asm/traps.h       |  40 +++--
 arch/xtensa/kernel/asm-offsets.c      |   8 +-
 arch/xtensa/kernel/coprocessor.S      | 230 +++++++++++++++-----------
 arch/xtensa/kernel/entry.S            |  12 +-
 arch/xtensa/kernel/process.c          | 112 ++++++++++---
 arch/xtensa/kernel/ptrace.c           |   3 +-
 arch/xtensa/kernel/s32c1i_selftest.c  |   7 +-
 arch/xtensa/kernel/signal.c           |   3 +-
 arch/xtensa/kernel/smp.c              |   7 +
 arch/xtensa/kernel/traps.c            |  69 ++++----
 13 files changed, 334 insertions(+), 182 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 01/10] xtensa: clean up function declarations in traps.c
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 02/10] xtensa: clean up exception handler prototypes Max Filippov
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Drop 'extern' from all function declarations and move those that need to
be visible from traps.c to traps.h. Add 'asmlinkage' to declarations of
fucntions defined in assembly. Add 'static' to declarations and
definitions only used locally. Add argument names in declarations.
Drop unused second argument from do_multihit and do_page_fault.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:

- move declarations to traps.h and add 'asmlinkage' annotations

 arch/xtensa/include/asm/traps.h | 18 +++++++++++--
 arch/xtensa/kernel/traps.c      | 46 ++++++++++++---------------------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 6fa47cd8e02d..fc63217232a4 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -39,8 +39,22 @@ struct exc_table {
  *  void (*)(struct pt_regs *regs, unsigned long exccause);
  */
 extern void * __init trap_set_handler(int cause, void *handler);
-extern void do_unhandled(struct pt_regs *regs, unsigned long exccause);
-void fast_second_level_miss(void);
+
+asmlinkage void fast_illegal_instruction_user(void);
+asmlinkage void fast_syscall_user(void);
+asmlinkage void fast_alloca(void);
+asmlinkage void fast_unaligned(void);
+asmlinkage void fast_second_level_miss(void);
+asmlinkage void fast_store_prohibited(void);
+asmlinkage void fast_coprocessor(void);
+
+asmlinkage void kernel_exception(void);
+asmlinkage void user_exception(void);
+asmlinkage void system_call(struct pt_regs *regs);
+
+void do_IRQ(int hwirq, struct pt_regs *regs);
+void do_page_fault(struct pt_regs *regs);
+void do_unhandled(struct pt_regs *regs, unsigned long exccause);
 
 /* Initialize minimal exc_table structure sufficient for basic paging */
 static inline void __init early_trap_init(void)
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 515719c7e750..b6bb5911ec7f 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -48,25 +48,16 @@
  * Machine specific interrupt handlers
  */
 
-extern void kernel_exception(void);
-extern void user_exception(void);
-
-extern void fast_illegal_instruction_user(void);
-extern void fast_syscall_user(void);
-extern void fast_alloca(void);
-extern void fast_unaligned(void);
-extern void fast_second_level_miss(void);
-extern void fast_store_prohibited(void);
-extern void fast_coprocessor(void);
-
-extern void do_illegal_instruction (struct pt_regs*);
-extern void do_interrupt (struct pt_regs*);
-extern void do_nmi(struct pt_regs *);
-extern void do_unaligned_user (struct pt_regs*);
-extern void do_multihit (struct pt_regs*, unsigned long);
-extern void do_page_fault (struct pt_regs*, unsigned long);
-extern void do_debug (struct pt_regs*);
-extern void system_call (struct pt_regs*);
+static void do_illegal_instruction(struct pt_regs *regs);
+static void do_interrupt(struct pt_regs *regs);
+#if XTENSA_FAKE_NMI
+static void do_nmi(struct pt_regs *regs);
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+static void do_unaligned_user(struct pt_regs *regs);
+#endif
+static void do_multihit(struct pt_regs *regs);
+static void do_debug(struct pt_regs *regs);
 
 /*
  * The vector table must be preceded by a save area (which
@@ -197,7 +188,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
  * Multi-hit exception. This if fatal!
  */
 
-void do_multihit(struct pt_regs *regs, unsigned long exccause)
+static void do_multihit(struct pt_regs *regs)
 {
 	die("Caught multihit exception", regs, SIGKILL);
 }
@@ -206,8 +197,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
  * IRQ handler.
  */
 
-extern void do_IRQ(int, struct pt_regs *);
-
 #if XTENSA_FAKE_NMI
 
 #define IS_POW2(v) (((v) & ((v) - 1)) == 0)
@@ -240,7 +229,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
 
 DEFINE_PER_CPU(unsigned long, nmi_count);
 
-void do_nmi(struct pt_regs *regs)
+static void do_nmi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -253,7 +242,7 @@ void do_nmi(struct pt_regs *regs)
 }
 #endif
 
-void do_interrupt(struct pt_regs *regs)
+static void do_interrupt(struct pt_regs *regs)
 {
 	static const unsigned int_level_mask[] = {
 		0,
@@ -303,8 +292,7 @@ void do_interrupt(struct pt_regs *regs)
  * Illegal instruction. Fatal if in kernel space.
  */
 
-void
-do_illegal_instruction(struct pt_regs *regs)
+static void do_illegal_instruction(struct pt_regs *regs)
 {
 	__die_if_kernel("Illegal instruction in kernel", regs, SIGKILL);
 
@@ -324,8 +312,7 @@ do_illegal_instruction(struct pt_regs *regs)
  */
 
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-void
-do_unaligned_user (struct pt_regs *regs)
+static void do_unaligned_user(struct pt_regs *regs)
 {
 	__die_if_kernel("Unhandled unaligned exception in kernel",
 			regs, SIGKILL);
@@ -346,8 +333,7 @@ do_unaligned_user (struct pt_regs *regs)
  * breakpoint structures to debug registers intact, so that
  * DEBUGCAUSE.DBNUM could be used in case of data breakpoint hit.
  */
-void
-do_debug(struct pt_regs *regs)
+static void do_debug(struct pt_regs *regs)
 {
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	int ret = check_hw_breakpoint(regs);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 02/10] xtensa: clean up exception handler prototypes
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
  2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h Max Filippov
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Exception handlers are currently passed as void pointers because they
may have one or two parameters. Only two handlers uses the second
parameter and it is available in the struct pt_regs anyway. Make all
handlers have only one parameter, introduce xtensa_exception_handler
type for handlers and use it in trap_set_handler.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:

- new patch

 arch/xtensa/include/asm/traps.h      | 14 ++++++--------
 arch/xtensa/kernel/s32c1i_selftest.c |  7 +++----
 arch/xtensa/kernel/traps.c           |  7 ++++---
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index fc63217232a4..bfdb0af61b07 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -12,6 +12,8 @@
 
 #include <asm/ptrace.h>
 
+typedef void xtensa_exception_handler(struct pt_regs *regs);
+
 /*
  * Per-CPU exception handling data structure.
  * EXCSAVE1 points to it.
@@ -30,15 +32,11 @@ struct exc_table {
 	/* Fast kernel exception handlers */
 	void *fast_kernel_handler[EXCCAUSE_N];
 	/* Default C-Handlers */
-	void *default_handler[EXCCAUSE_N];
+	xtensa_exception_handler *default_handler[EXCCAUSE_N];
 };
 
-/*
- * handler must be either of the following:
- *  void (*)(struct pt_regs *regs);
- *  void (*)(struct pt_regs *regs, unsigned long exccause);
- */
-extern void * __init trap_set_handler(int cause, void *handler);
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler);
 
 asmlinkage void fast_illegal_instruction_user(void);
 asmlinkage void fast_syscall_user(void);
@@ -54,7 +52,7 @@ asmlinkage void system_call(struct pt_regs *regs);
 
 void do_IRQ(int hwirq, struct pt_regs *regs);
 void do_page_fault(struct pt_regs *regs);
-void do_unhandled(struct pt_regs *regs, unsigned long exccause);
+void do_unhandled(struct pt_regs *regs);
 
 /* Initialize minimal exc_table structure sufficient for basic paging */
 static inline void __init early_trap_init(void)
diff --git a/arch/xtensa/kernel/s32c1i_selftest.c b/arch/xtensa/kernel/s32c1i_selftest.c
index 07e56e3a9a8b..8362388c8719 100644
--- a/arch/xtensa/kernel/s32c1i_selftest.c
+++ b/arch/xtensa/kernel/s32c1i_selftest.c
@@ -40,14 +40,13 @@ static inline int probed_compare_swap(int *v, int cmp, int set)
 
 /* Handle probed exception */
 
-static void __init do_probed_exception(struct pt_regs *regs,
-				       unsigned long exccause)
+static void __init do_probed_exception(struct pt_regs *regs)
 {
 	if (regs->pc == rcw_probe_pc) {	/* exception on s32c1i ? */
 		regs->pc += 3;		/* skip the s32c1i instruction */
-		rcw_exc = exccause;
+		rcw_exc = regs->exccause;
 	} else {
-		do_unhandled(regs, exccause);
+		do_unhandled(regs);
 	}
 }
 
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index b6bb5911ec7f..d6b1a0c3e319 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -170,7 +170,7 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err)
  * Unhandled Exceptions. Kill user task or panic if in kernel space.
  */
 
-void do_unhandled(struct pt_regs *regs, unsigned long exccause)
+void do_unhandled(struct pt_regs *regs)
 {
 	__die_if_kernel("Caught unhandled exception - should not happen",
 			regs, SIGKILL);
@@ -180,7 +180,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
 			    "(pid = %d, pc = %#010lx) - should not happen\n"
 			    "\tEXCCAUSE is %ld\n",
 			    current->comm, task_pid_nr(current), regs->pc,
-			    exccause);
+			    regs->exccause);
 	force_sig(SIGILL);
 }
 
@@ -360,7 +360,8 @@ static void do_debug(struct pt_regs *regs)
 
 /* Set exception C handler - for temporary use when probing exceptions */
 
-void * __init trap_set_handler(int cause, void *handler)
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler)
 {
 	void *previous = per_cpu(exc_table, 0).default_handler[cause];
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
  2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
  2022-04-21 10:10 ` [PATCH v2 02/10] xtensa: clean up exception handler prototypes Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 04/10] xtensa: clean up excsave1 initialization Max Filippov
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Drop 'extern' from all function declarations. Add parameter names in
declarations.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/coprocessor.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index 0fbe2a740b8d..a360efced7e7 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -143,10 +143,9 @@ typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
 	__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
 
 extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
-extern void coprocessor_flush(struct thread_info*, int);
-
-extern void coprocessor_release_all(struct thread_info*);
-extern void coprocessor_flush_all(struct thread_info*);
+void coprocessor_flush(struct thread_info *ti, int cp_index);
+void coprocessor_release_all(struct thread_info *ti);
+void coprocessor_flush_all(struct thread_info *ti);
 
 #endif	/* XTENSA_HAVE_COPROCESSORS */
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 04/10] xtensa: clean up excsave1 initialization
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (2 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Use xtensa_set_sr instead of inline assembly.
Rename local variable exc_table in early_trap_init to avoid conflict
with per-CPU variable of the same name.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/traps.h | 4 ++--
 arch/xtensa/kernel/traps.c      | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index bfdb0af61b07..514376eff58c 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -57,11 +57,11 @@ void do_unhandled(struct pt_regs *regs);
 /* Initialize minimal exc_table structure sufficient for basic paging */
 static inline void __init early_trap_init(void)
 {
-	static struct exc_table exc_table __initdata = {
+	static struct exc_table init_exc_table __initdata = {
 		.fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
 			fast_second_level_miss,
 	};
-	__asm__ __volatile__("wsr  %0, excsave1\n" : : "a" (&exc_table));
+	xtensa_set_sr(&init_exc_table, excsave1);
 }
 
 void secondary_trap_init(void);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index d6b1a0c3e319..95903f25e523 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -372,8 +372,7 @@ __init trap_set_handler(int cause, xtensa_exception_handler *handler)
 
 static void trap_init_excsave(void)
 {
-	unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table);
-	__asm__ __volatile__("wsr  %0, excsave1\n" : : "a" (excsave1));
+	xtensa_set_sr(this_cpu_ptr(&exc_table), excsave1);
 }
 
 static void trap_init_debug(void)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (3 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 04/10] xtensa: clean up excsave1 initialization Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode Max Filippov
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Instead of emulating call0 in fast_coprocessor use that opcode directly.
Use 'ret' instead of 'jx a0'.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/coprocessor.S | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index c7b9f12896f2..8bcbabbff38a 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -30,7 +30,7 @@
 		.align 4;						\
 	.Lsave_cp_regs_cp##x:						\
 		xchal_cp##x##_store a2 a3 a4 a5 a6;			\
-		jx	a0;						\
+		ret;							\
 	.endif
 
 #define SAVE_CP_REGS_TAB(x)						\
@@ -47,7 +47,7 @@
 		.align 4;						\
 	.Lload_cp_regs_cp##x:						\
 		xchal_cp##x##_load a2 a3 a4 a5 a6;			\
-		jx	a0;						\
+		ret;							\
 	.endif
 
 #define LOAD_CP_REGS_TAB(x)						\
@@ -163,21 +163,20 @@ ENTRY(fast_coprocessor)
 	s32i	a5, a4, THREAD_CPENABLE
 
 	/*
-	 * Get context save area and 'call' save routine. 
+	 * Get context save area and call save routine.
 	 * (a4 still holds previous owner (thread_info), a3 CP number)
 	 */
 
 	movi	a5, .Lsave_cp_regs_jump_table
-	movi	a0, 2f			# a0: 'return' address
 	addx8	a3, a3, a5		# a3: coprocessor number
 	l32i	a2, a3, 4		# a2: xtregs offset
 	l32i	a3, a3, 0		# a3: jump address
 	add	a2, a2, a4
-	jx	a3
+	callx0	a3
 
 	/* Note that only a0 and a1 were preserved. */
 
-2:	rsr	a3, exccause
+	rsr	a3, exccause
 	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
 	movi	a0, coprocessor_owner
 	addx4	a0, a3, a0
@@ -187,19 +186,18 @@ ENTRY(fast_coprocessor)
 1:	GET_THREAD_INFO (a4, a1)
 	s32i	a4, a0, 0
 
-	/* Get context save area and 'call' load routine. */
+	/* Get context save area and call load routine. */
 
 	movi	a5, .Lload_cp_regs_jump_table
-	movi	a0, 1f
 	addx8	a3, a3, a5
 	l32i	a2, a3, 4		# a2: xtregs offset
 	l32i	a3, a3, 0		# a3: jump address
 	add	a2, a2, a4
-	jx	a3
+	callx0	a3
 
 	/* Restore all registers and return from exception handler. */
 
-1:	l32i	a6, a1, PT_AREG6
+	l32i	a6, a1, PT_AREG6
 	l32i	a5, a1, PT_AREG5
 	l32i	a4, a1, PT_AREG4
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (4 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 07/10] xtensa: add xtensa_xsr macro Max Filippov
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

In order to let drivers use xtensa coprocessors on behalf of the calling
process the kernel must handle coprocessor exceptions from the kernel
mode the same way as from the user mode.

This is not sufficient to allow using coprocessors transparently in IRQ
or softirq context. Should such users exist they must be aware of the
context and do the right thing, e.g. preserve the coprocessor state and
resore it after use.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 95903f25e523..62c497605128 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -69,7 +69,7 @@ static void do_debug(struct pt_regs *regs);
 #define USER		0x02
 
 #define COPROCESSOR(x)							\
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor }
 
 typedef struct {
 	int cause;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 07/10] xtensa: add xtensa_xsr macro
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (5 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB Max Filippov
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

xtensa_xsr does the XSR instruction for the specified special register.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/processor.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..76bc63127c66 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -246,6 +246,13 @@ extern unsigned long __get_wchan(struct task_struct *p);
 	 v; \
 	 })
 
+#define xtensa_xsr(x, sr) \
+	({ \
+	 unsigned int __v__ = (unsigned int)(x); \
+	 __asm__ __volatile__ ("xsr %0, " __stringify(sr) : "+a"(__v__)); \
+	 __v__; \
+	 })
+
 #if XCHAL_HAVE_EXTERN_REGS
 
 static inline void set_er(unsigned long value, unsigned long addr)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (6 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 07/10] xtensa: add xtensa_xsr macro Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush Max Filippov
  2022-04-21 10:10 ` [PATCH v2 10/10] xtensa: support coprocessors on SMP Max Filippov
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Both tables share the same offset field but the different function
pointers. Merge them into single table with 3-element entries to reduce
code and data duplication.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:

- new patch

 arch/xtensa/kernel/coprocessor.S | 85 ++++++++++++++------------------
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 8bcbabbff38a..af11ddaa8c5f 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -33,15 +33,6 @@
 		ret;							\
 	.endif
 
-#define SAVE_CP_REGS_TAB(x)						\
-	.if XTENSA_HAVE_COPROCESSOR(x);					\
-		.long .Lsave_cp_regs_cp##x;				\
-	.else;								\
-		.long 0;						\
-	.endif;								\
-	.long THREAD_XTREGS_CP##x
-
-
 #define LOAD_CP_REGS(x)							\
 	.if XTENSA_HAVE_COPROCESSOR(x);					\
 		.align 4;						\
@@ -50,14 +41,19 @@
 		ret;							\
 	.endif
 
-#define LOAD_CP_REGS_TAB(x)						\
+#define CP_REGS_TAB(x)							\
 	.if XTENSA_HAVE_COPROCESSOR(x);					\
+		.long .Lsave_cp_regs_cp##x;				\
 		.long .Lload_cp_regs_cp##x;				\
 	.else;								\
-		.long 0;						\
+		.long 0, 0;						\
 	.endif;								\
 	.long THREAD_XTREGS_CP##x
 
+#define CP_REGS_TAB_SAVE 0
+#define CP_REGS_TAB_LOAD 4
+#define CP_REGS_TAB_OFFSET 8
+
 	__XTENSA_HANDLER
 
 	SAVE_CP_REGS(0)
@@ -79,25 +75,15 @@
 	LOAD_CP_REGS(7)
 
 	.align 4
-.Lsave_cp_regs_jump_table:
-	SAVE_CP_REGS_TAB(0)
-	SAVE_CP_REGS_TAB(1)
-	SAVE_CP_REGS_TAB(2)
-	SAVE_CP_REGS_TAB(3)
-	SAVE_CP_REGS_TAB(4)
-	SAVE_CP_REGS_TAB(5)
-	SAVE_CP_REGS_TAB(6)
-	SAVE_CP_REGS_TAB(7)
-
-.Lload_cp_regs_jump_table:
-	LOAD_CP_REGS_TAB(0)
-	LOAD_CP_REGS_TAB(1)
-	LOAD_CP_REGS_TAB(2)
-	LOAD_CP_REGS_TAB(3)
-	LOAD_CP_REGS_TAB(4)
-	LOAD_CP_REGS_TAB(5)
-	LOAD_CP_REGS_TAB(6)
-	LOAD_CP_REGS_TAB(7)
+.Lcp_regs_jump_table:
+	CP_REGS_TAB(0)
+	CP_REGS_TAB(1)
+	CP_REGS_TAB(2)
+	CP_REGS_TAB(3)
+	CP_REGS_TAB(4)
+	CP_REGS_TAB(5)
+	CP_REGS_TAB(6)
+	CP_REGS_TAB(7)
 
 /*
  * Entry condition:
@@ -125,13 +111,12 @@ ENTRY(fast_coprocessor)
 	rsr	a2, depc
 	s32i	a2, a1, PT_AREG2
 
-	/*
-	 * The hal macros require up to 4 temporary registers. We use a3..a6.
-	 */
+	/* The hal macros require up to 4 temporary registers. We use a3..a6. */
 
 	s32i	a4, a1, PT_AREG4
 	s32i	a5, a1, PT_AREG5
 	s32i	a6, a1, PT_AREG6
+	s32i	a7, a1, PT_AREG7
 
 	/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
 
@@ -148,6 +133,12 @@ ENTRY(fast_coprocessor)
 	wsr	a0, cpenable
 	rsync
 
+	/* Get coprocessor save/load table entry (a7). */
+
+	movi	a7, .Lcp_regs_jump_table
+	addx8	a7, a3, a7
+	addx4	a7, a3, a7
+
 	/* Retrieve previous owner. (a3 still holds CP number) */
 
 	movi	a0, coprocessor_owner	# list of owners
@@ -167,10 +158,8 @@ ENTRY(fast_coprocessor)
 	 * (a4 still holds previous owner (thread_info), a3 CP number)
 	 */
 
-	movi	a5, .Lsave_cp_regs_jump_table
-	addx8	a3, a3, a5		# a3: coprocessor number
-	l32i	a2, a3, 4		# a2: xtregs offset
-	l32i	a3, a3, 0		# a3: jump address
+	l32i	a2, a7, CP_REGS_TAB_OFFSET
+	l32i	a3, a7, CP_REGS_TAB_SAVE
 	add	a2, a2, a4
 	callx0	a3
 
@@ -188,15 +177,14 @@ ENTRY(fast_coprocessor)
 
 	/* Get context save area and call load routine. */
 
-	movi	a5, .Lload_cp_regs_jump_table
-	addx8	a3, a3, a5
-	l32i	a2, a3, 4		# a2: xtregs offset
-	l32i	a3, a3, 0		# a3: jump address
+	l32i	a2, a7, CP_REGS_TAB_OFFSET
+	l32i	a3, a7, CP_REGS_TAB_LOAD
 	add	a2, a2, a4
 	callx0	a3
 
 	/* Restore all registers and return from exception handler. */
 
+	l32i	a7, a1, PT_AREG7
 	l32i	a6, a1, PT_AREG6
 	l32i	a5, a1, PT_AREG5
 	l32i	a4, a1, PT_AREG4
@@ -232,13 +220,14 @@ ENTRY(coprocessor_flush)
 	abi_entry(4)
 
 	s32i	a0, a1, 0
-	movi	a0, .Lsave_cp_regs_jump_table
-	addx8	a3, a3, a0
-	l32i	a4, a3, 4
-	l32i	a3, a3, 0
-	add	a2, a2, a4
-	beqz	a3, 1f
-	callx0	a3
+	movi	a4, .Lcp_regs_jump_table
+	addx8	a4, a3, a4
+	addx4	a3, a3, a4
+	l32i	a4, a3, CP_REGS_TAB_SAVE
+	beqz	a4, 1f
+	l32i	a3, a3, CP_REGS_TAB_OFFSET
+	add	a2, a2, a3
+	callx0	a4
 1:	l32i	a0, a1, 0
 
 	abi_ret(4)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (7 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  2022-04-21 10:10 ` [PATCH v2 10/10] xtensa: support coprocessors on SMP Max Filippov
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

coprocessor_flush is an ordinary function, it can use all registers.
Don't reserve stack frame for it and use a7 to preserve a0 around the
context saving call.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:

- new patch

 arch/xtensa/kernel/coprocessor.S | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index af11ddaa8c5f..95412409c49e 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -216,10 +216,8 @@ ENDPROC(fast_coprocessor)
 
 ENTRY(coprocessor_flush)
 
-	/* reserve 4 bytes on stack to save a0 */
-	abi_entry(4)
+	abi_entry_default
 
-	s32i	a0, a1, 0
 	movi	a4, .Lcp_regs_jump_table
 	addx8	a4, a3, a4
 	addx4	a3, a3, a4
@@ -227,10 +225,11 @@ ENTRY(coprocessor_flush)
 	beqz	a4, 1f
 	l32i	a3, a3, CP_REGS_TAB_OFFSET
 	add	a2, a2, a3
+	mov	a7, a0
 	callx0	a4
-1:	l32i	a0, a1, 0
-
-	abi_ret(4)
+	mov	a0, a7
+1:
+	abi_ret_default
 
 ENDPROC(coprocessor_flush)
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 10/10] xtensa: support coprocessors on SMP
  2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
                   ` (8 preceding siblings ...)
  2022-04-21 10:10 ` [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
  9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.

Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
function coprocessor_flush_release_all to do flush followed by release
atomically. Add function local_coprocessors_flush_release_all to flush
and release all coprocessors on the local CPU and use it to flush
coprocessor contexts from the CPU that goes offline.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:

- document rules for coprocessor context management
- clean up context management from the LKMM point of view, introduce
  and document barriers
- support CPU hotplug

 arch/xtensa/include/asm/coprocessor.h |   4 +-
 arch/xtensa/include/asm/thread_info.h |   7 +-
 arch/xtensa/include/asm/traps.h       |   6 ++
 arch/xtensa/kernel/asm-offsets.c      |   8 +-
 arch/xtensa/kernel/coprocessor.S      | 122 +++++++++++++++++++-------
 arch/xtensa/kernel/entry.S            |  12 ++-
 arch/xtensa/kernel/process.c          | 112 +++++++++++++++++------
 arch/xtensa/kernel/ptrace.c           |   3 +-
 arch/xtensa/kernel/signal.c           |   3 +-
 arch/xtensa/kernel/smp.c              |   7 ++
 arch/xtensa/kernel/traps.c            |  13 ++-
 11 files changed, 230 insertions(+), 67 deletions(-)

diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index a360efced7e7..3b1a0d5d2169 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -142,10 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
 typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
 	__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
 
-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
+struct thread_info;
 void coprocessor_flush(struct thread_info *ti, int cp_index);
 void coprocessor_release_all(struct thread_info *ti);
 void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);
+void local_coprocessors_flush_release_all(void);
 
 #endif	/* XTENSA_HAVE_COPROCESSORS */
 
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index f6fcbba1d02f..52974317a6b6 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -52,12 +52,17 @@ struct thread_info {
 	__u32			cpu;		/* current CPU */
 	__s32			preempt_count;	/* 0 => preemptable,< 0 => BUG*/
 
-	unsigned long		cpenable;
 #if XCHAL_HAVE_EXCLUSIVE
 	/* result of the most recent exclusive store */
 	unsigned long		atomctl8;
 #endif
 
+	/*
+	 * If i-th bit is set then coprocessor state is loaded into the
+	 * coprocessor i on CPU cp_owner_cpu.
+	 */
+	unsigned long		cpenable;
+	u32			cp_owner_cpu;
 	/* Allocate storage for extra user states and coprocessor states. */
 #if XTENSA_HAVE_COPROCESSORS
 	xtregs_coprocessor_t	xtregs_cp;
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 514376eff58c..6f74ccc0c7ea 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -27,6 +27,10 @@ struct exc_table {
 	void *fixup;
 	/* For passing a parameter to fixup */
 	void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+	/* Pointers to owner struct thread_info */
+	struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
 	/* Fast user exception handlers */
 	void *fast_user_handler[EXCCAUSE_N];
 	/* Fast kernel exception handlers */
@@ -35,6 +39,8 @@ struct exc_table {
 	xtensa_exception_handler *default_handler[EXCCAUSE_N];
 };
 
+DECLARE_PER_CPU(struct exc_table, exc_table);
+
 xtensa_exception_handler *
 __init trap_set_handler(int cause, xtensa_exception_handler *handler);
 
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 37278e2785fb..e3b9cf4c2289 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -91,10 +91,12 @@ int main(void)
 	/* struct thread_info (offset from start_struct) */
 	DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
 	DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
-	DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
 #if XCHAL_HAVE_EXCLUSIVE
 	DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
 #endif
+	DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+	DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+	DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
 #if XTENSA_HAVE_COPROCESSORS
 	DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
 	DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,6 +139,10 @@ int main(void)
 	DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
 	DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
 	DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+	DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+	       offsetof(struct exc_table, coprocessor_owner));
+#endif
 	DEFINE(EXC_TABLE_FAST_USER,
 	       offsetof(struct exc_table, fast_user_handler));
 	DEFINE(EXC_TABLE_FAST_KERNEL,
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 95412409c49e..ef33e76e07d8 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -19,6 +19,26 @@
 #include <asm/current.h>
 #include <asm/regs.h>
 
+/*
+ * Rules for coprocessor state manipulation on SMP:
+ *
+ * - a task may have live coprocessors only on one CPU.
+ *
+ * - whether coprocessor context of task T is live on some CPU is
+ *   denoted by T's thread_info->cpenable.
+ *
+ * - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
+ *   is valid in the T's thread_info. Zero thread_info->cpenable means that
+ *   coprocessor context is valid in the T's thread_info.
+ *
+ * - if a coprocessor context of task T is live on CPU X, only CPU X changes
+ *   T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
+ *   This is done by making sure that for the task T with live coprocessor
+ *   on CPU X cpenable SR is 0 when T runs on any other CPU Y.
+ *   When fast_coprocessor exception is taken on CPU Y it goes to the
+ *   C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
+ */
+
 #if XTENSA_HAVE_COPROCESSORS
 
 /*
@@ -101,9 +121,37 @@
 
 ENTRY(fast_coprocessor)
 
+	s32i	a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+	/*
+	 * Check if any coprocessor context is live on another CPU
+	 * and if so go through the C-level coprocessor exception handler
+	 * to flush it to memory.
+	 */
+	GET_THREAD_INFO (a0, a2)
+	l32i	a3, a0, THREAD_CPENABLE
+	beqz	a3, .Lload_local
+
+	/*
+	 * Pairs with smp_wmb in local_coprocessor_release_all
+	 * and with both memws below.
+	 */
+	memw
+	l32i	a3, a0, THREAD_CPU
+	l32i	a0, a0, THREAD_CP_OWNER_CPU
+	beq	a0, a3, .Lload_local
+
+	rsr	a0, ps
+	l32i	a3, a2, PT_AREG3
+	bbci.l	a0, PS_UM_BIT, 1f
+	call0	user_exception
+1:	call0	kernel_exception
+#endif
+
 	/* Save remaining registers a1-a3 and SAR */
 
-	s32i	a3, a2, PT_AREG3
+.Lload_local:
 	rsr	a3, sar
 	s32i	a1, a2, PT_AREG1
 	s32i	a3, a2, PT_SAR
@@ -117,6 +165,9 @@ ENTRY(fast_coprocessor)
 	s32i	a5, a1, PT_AREG5
 	s32i	a6, a1, PT_AREG6
 	s32i	a7, a1, PT_AREG7
+	s32i	a8, a1, PT_AREG8
+	s32i	a9, a1, PT_AREG9
+	s32i	a10, a1, PT_AREG10
 
 	/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
 
@@ -139,51 +190,66 @@ ENTRY(fast_coprocessor)
 	addx8	a7, a3, a7
 	addx4	a7, a3, a7
 
-	/* Retrieve previous owner. (a3 still holds CP number) */
+	/* Retrieve previous owner (a8). */
 
-	movi	a0, coprocessor_owner	# list of owners
+	rsr	a0, excsave1		# exc_table
 	addx4	a0, a3, a0		# entry for CP
-	l32i	a4, a0, 0
+	l32i	a8, a0, EXC_TABLE_COPROCESSOR_OWNER
 
-	beqz	a4, 1f			# skip 'save' if no previous owner
+	/* Set new owner (a9). */
 
-	/* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
+	GET_THREAD_INFO (a9, a1)
+	l32i	a4, a9, THREAD_CPU
+	s32i	a9, a0, EXC_TABLE_COPROCESSOR_OWNER
+	s32i	a4, a9, THREAD_CP_OWNER_CPU
 
-	l32i	a5, a4, THREAD_CPENABLE
-	xor	a5, a5, a2		# (1 << cp-id) still in a2
-	s32i	a5, a4, THREAD_CPENABLE
+	/*
+	 * Enable coprocessor for the new owner. (a2 = 1 << CP number)
+	 * This can be done before loading context into the coprocessor.
+	 */
+	l32i	a4, a9, THREAD_CPENABLE
+	or	a4, a4, a2
 
 	/*
-	 * Get context save area and call save routine.
-	 * (a4 still holds previous owner (thread_info), a3 CP number)
+	 * Make sure THREAD_CP_OWNER_CPU is in memory before updating
+	 * THREAD_CPENABLE
 	 */
+	memw				# (2)
+	s32i	a4, a9, THREAD_CPENABLE
 
-	l32i	a2, a7, CP_REGS_TAB_OFFSET
-	l32i	a3, a7, CP_REGS_TAB_SAVE
-	add	a2, a2, a4
-	callx0	a3
+	beqz	a8, 1f			# skip 'save' if no previous owner
 
-	/* Note that only a0 and a1 were preserved. */
+	/* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
 
-	rsr	a3, exccause
-	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
-	movi	a0, coprocessor_owner
-	addx4	a0, a3, a0
+	l32i	a10, a8, THREAD_CPENABLE
+	xor	a10, a10, a2
 
-	/* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
+	/* Get context save area and call save routine. */
 
-1:	GET_THREAD_INFO (a4, a1)
-	s32i	a4, a0, 0
+	l32i	a2, a7, CP_REGS_TAB_OFFSET
+	l32i	a3, a7, CP_REGS_TAB_SAVE
+	add	a2, a2, a8
+	callx0	a3
 
+	/*
+	 * Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
+	 * before updating THREAD_CPENABLE
+	 */
+	memw				# (3)
+	s32i	a10, a8, THREAD_CPENABLE
+1:
 	/* Get context save area and call load routine. */
 
 	l32i	a2, a7, CP_REGS_TAB_OFFSET
 	l32i	a3, a7, CP_REGS_TAB_LOAD
-	add	a2, a2, a4
+	add	a2, a2, a9
 	callx0	a3
 
 	/* Restore all registers and return from exception handler. */
 
+	l32i	a10, a1, PT_AREG10
+	l32i	a9, a1, PT_AREG9
+	l32i	a8, a1, PT_AREG8
 	l32i	a7, a1, PT_AREG7
 	l32i	a6, a1, PT_AREG6
 	l32i	a5, a1, PT_AREG5
@@ -233,12 +299,4 @@ ENTRY(coprocessor_flush)
 
 ENDPROC(coprocessor_flush)
 
-	.data
-
-ENTRY(coprocessor_owner)
-
-	.fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
 #endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index f2c789a5a92a..3255d4f61844 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -2071,8 +2071,16 @@ ENTRY(_switch_to)
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
 	l32i	a3, a5, THREAD_CPENABLE
-	xsr	a3, cpenable
-	s32i	a3, a4, THREAD_CPENABLE
+#ifdef CONFIG_SMP
+	beqz	a3, 1f
+	memw			# pairs with memw (2) in fast_coprocessor
+	l32i	a6, a5, THREAD_CP_OWNER_CPU
+	l32i	a7, a5, THREAD_CPU
+	beq	a6, a7, 1f	# load 0 into CPENABLE if current CPU is not the owner
+	movi	a3, 0
+1:
+#endif
+	wsr	a3, cpenable
 #endif
 
 #if XCHAL_HAVE_EXCLUSIVE
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e8bfbca5f001..7e38292dd07a 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/regs.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/traps.h>
 
 extern void ret_from_fork(void);
 extern void ret_from_kernel_thread(void);
@@ -63,52 +64,114 @@ EXPORT_SYMBOL(__stack_chk_guard);
 
 #if XTENSA_HAVE_COPROCESSORS
 
-void coprocessor_release_all(struct thread_info *ti)
+void local_coprocessors_flush_release_all(void)
 {
-	unsigned long cpenable;
-	int i;
+	struct thread_info **coprocessor_owner;
+	struct thread_info *unique_owner[XCHAL_CP_MAX];
+	int n = 0;
+	int i, j;
 
-	/* Make sure we don't switch tasks during this operation. */
+	coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+	xtensa_set_sr(XCHAL_CP_MASK, cpenable);
 
-	preempt_disable();
+	for (i = 0; i < XCHAL_CP_MAX; i++) {
+		struct thread_info *ti = coprocessor_owner[i];
 
-	/* Walk through all cp owners and release it for the requested one. */
+		if (ti) {
+			coprocessor_flush(ti, i);
 
-	cpenable = ti->cpenable;
+			for (j = 0; j < n; j++)
+				if (unique_owner[j] == ti)
+					break;
+			if (j == n)
+				unique_owner[n++] = ti;
 
-	for (i = 0; i < XCHAL_CP_MAX; i++) {
-		if (coprocessor_owner[i] == ti) {
-			coprocessor_owner[i] = 0;
-			cpenable &= ~(1 << i);
+			coprocessor_owner[i] = NULL;
 		}
 	}
+	for (i = 0; i < n; i++) {
+		/* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+		smp_wmb();
+		unique_owner[i]->cpenable = 0;
+	}
+	xtensa_set_sr(0, cpenable);
+}
 
-	ti->cpenable = cpenable;
+static void local_coprocessor_release_all(void *info)
+{
+	struct thread_info *ti = info;
+	struct thread_info **coprocessor_owner;
+	int i;
+
+	coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+
+	/* Walk through all cp owners and release it for the requested one. */
+
+	for (i = 0; i < XCHAL_CP_MAX; i++) {
+		if (coprocessor_owner[i] == ti)
+			coprocessor_owner[i] = NULL;
+	}
+	/* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+	smp_wmb();
+	ti->cpenable = 0;
 	if (ti == current_thread_info())
 		xtensa_set_sr(0, cpenable);
+}
 
-	preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+	if (ti->cpenable) {
+		/* pairs with memw (2) in fast_coprocessor */
+		smp_rmb();
+		smp_call_function_single(ti->cp_owner_cpu,
+					 local_coprocessor_release_all,
+					 ti, true);
+	}
 }
 
-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
 {
-	unsigned long cpenable, old_cpenable;
+	struct thread_info *ti = info;
+	struct thread_info **coprocessor_owner;
+	unsigned long old_cpenable;
 	int i;
 
-	preempt_disable();
-
-	old_cpenable = xtensa_get_sr(cpenable);
-	cpenable = ti->cpenable;
-	xtensa_set_sr(cpenable, cpenable);
+	coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+	old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
 
 	for (i = 0; i < XCHAL_CP_MAX; i++) {
-		if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+		if (coprocessor_owner[i] == ti)
 			coprocessor_flush(ti, i);
-		cpenable >>= 1;
 	}
 	xtensa_set_sr(old_cpenable, cpenable);
+}
 
-	preempt_enable();
+void coprocessor_flush_all(struct thread_info *ti)
+{
+	if (ti->cpenable) {
+		/* pairs with memw (2) in fast_coprocessor */
+		smp_rmb();
+		smp_call_function_single(ti->cp_owner_cpu,
+					 local_coprocessor_flush_all,
+					 ti, true);
+	}
+}
+
+static void local_coprocessor_flush_release_all(void *info)
+{
+	local_coprocessor_flush_all(info);
+	local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+	if (ti->cpenable) {
+		/* pairs with memw (2) in fast_coprocessor */
+		smp_rmb();
+		smp_call_function_single(ti->cp_owner_cpu,
+					 local_coprocessor_flush_release_all,
+					 ti, true);
+	}
 }
 
 #endif
@@ -140,8 +203,7 @@ void flush_thread(void)
 {
 #if XTENSA_HAVE_COPROCESSORS
 	struct thread_info *ti = current_thread_info();
-	coprocessor_flush_all(ti);
-	coprocessor_release_all(ti);
+	coprocessor_flush_release_all(ti);
 #endif
 	flush_ptrace_hw_breakpoint(current);
 }
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 323c678a691f..22cdaa6729d3 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -171,8 +171,7 @@ static int tie_set(struct task_struct *target,
 
 #if XTENSA_HAVE_COPROCESSORS
 	/* Flush all coprocessors before we overwrite them. */
-	coprocessor_flush_all(ti);
-	coprocessor_release_all(ti);
+	coprocessor_flush_release_all(ti);
 	ti->xtregs_cp.cp0 = newregs->cp0;
 	ti->xtregs_cp.cp1 = newregs->cp1;
 	ti->xtregs_cp.cp2 = newregs->cp2;
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 6f68649e86ba..c9ffd42db873 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -162,8 +162,7 @@ setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs)
 		return err;
 
 #if XTENSA_HAVE_COPROCESSORS
-	coprocessor_flush_all(ti);
-	coprocessor_release_all(ti);
+	coprocessor_flush_release_all(ti);
 	err |= __copy_to_user(&frame->xtregs.cp, &ti->xtregs_cp,
 			      sizeof (frame->xtregs.cp));
 #endif
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index ed7c188ebedc..e861ce76ad38 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -30,6 +30,7 @@
 #include <linux/thread_info.h>
 
 #include <asm/cacheflush.h>
+#include <asm/coprocessor.h>
 #include <asm/kdebug.h>
 #include <asm/mmu_context.h>
 #include <asm/mxregs.h>
@@ -273,6 +274,12 @@ int __cpu_disable(void)
 	 */
 	set_cpu_online(cpu, false);
 
+#if XTENSA_HAVE_COPROCESSORS
+	/*
+	 * Flush coprocessor contexts that are active on the current CPU.
+	 */
+	local_coprocessors_flush_release_all();
+#endif
 	/*
 	 * OK - migrate IRQs away from this CPU
 	 */
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 62c497605128..138a86fbe9d7 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -57,6 +57,9 @@ static void do_nmi(struct pt_regs *regs);
 static void do_unaligned_user(struct pt_regs *regs);
 #endif
 static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
 static void do_debug(struct pt_regs *regs);
 
 /*
@@ -69,7 +72,8 @@ static void do_debug(struct pt_regs *regs);
 #define USER		0x02
 
 #define COPROCESSOR(x)							\
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor },\
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
 
 typedef struct {
 	int cause;
@@ -327,6 +331,13 @@ static void do_unaligned_user(struct pt_regs *regs)
 }
 #endif
 
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+	coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
 /* Handle debug events.
  * When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
  * preemption disabled to avoid rescheduling and keep mapping of hardware
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-04-21 10:12 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
2022-04-21 10:10 ` [PATCH v2 02/10] xtensa: clean up exception handler prototypes Max Filippov
2022-04-21 10:10 ` [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h Max Filippov
2022-04-21 10:10 ` [PATCH v2 04/10] xtensa: clean up excsave1 initialization Max Filippov
2022-04-21 10:10 ` [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
2022-04-21 10:10 ` [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode Max Filippov
2022-04-21 10:10 ` [PATCH v2 07/10] xtensa: add xtensa_xsr macro Max Filippov
2022-04-21 10:10 ` [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB Max Filippov
2022-04-21 10:10 ` [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush Max Filippov
2022-04-21 10:10 ` [PATCH v2 10/10] xtensa: support coprocessors on SMP Max Filippov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.