linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6] xtensa: support coprocessors on SMP
@ 2022-04-16  4:25 Max Filippov
  2022-04-16  4:25 ` [PATCH 1/6] xtensa: clean up function declarations in traps.c Max Filippov
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Hello,

this series does a bunch of small cleanups around coprocessor handling
code and adds coprocessors support in SMP configurations.

Max Filippov (6):
  xtensa: clean up function declarations in traps.c
  xtensa: clean up declarations in coprocessor.h
  xtensa: clean up excsave1 initialization
  xtensa: use callx0 opcode in fast_coprocessor
  xtensa: add xtensa_xsr macro
  xtensa: support coprocessors on SMP

 arch/xtensa/include/asm/coprocessor.h | 10 ++--
 arch/xtensa/include/asm/processor.h   |  7 +++
 arch/xtensa/include/asm/thread_info.h |  7 ++-
 arch/xtensa/include/asm/traps.h       | 11 +++-
 arch/xtensa/kernel/asm-offsets.c      |  8 ++-
 arch/xtensa/kernel/coprocessor.S      | 61 +++++++++++++--------
 arch/xtensa/kernel/entry.S            | 17 ++++++
 arch/xtensa/kernel/process.c          | 70 +++++++++++++++---------
 arch/xtensa/kernel/traps.c            | 77 +++++++++++++++------------
 9 files changed, 177 insertions(+), 91 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] xtensa: clean up function declarations in traps.c
  2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
@ 2022-04-16  4:25 ` Max Filippov
  2022-04-16  4:25 ` [PATCH 2/6] xtensa: clean up declarations in coprocessor.h Max Filippov
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Drop 'extern' from all function declarations. Add 'static' to
declarations and definitions only used locally. Add argument names in
declarations. Drop unused and not passed second argument from do_multihit
and do_page_fault.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/traps.c | 61 +++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 515719c7e750..a85992d60c11 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -48,25 +48,31 @@
  * Machine specific interrupt handlers
  */
 
-extern void kernel_exception(void);
-extern void user_exception(void);
-
-extern void fast_illegal_instruction_user(void);
-extern void fast_syscall_user(void);
-extern void fast_alloca(void);
-extern void fast_unaligned(void);
-extern void fast_second_level_miss(void);
-extern void fast_store_prohibited(void);
-extern void fast_coprocessor(void);
-
-extern void do_illegal_instruction (struct pt_regs*);
-extern void do_interrupt (struct pt_regs*);
-extern void do_nmi(struct pt_regs *);
-extern void do_unaligned_user (struct pt_regs*);
-extern void do_multihit (struct pt_regs*, unsigned long);
-extern void do_page_fault (struct pt_regs*, unsigned long);
-extern void do_debug (struct pt_regs*);
-extern void system_call (struct pt_regs*);
+void kernel_exception(void);
+void user_exception(void);
+
+void fast_illegal_instruction_user(void);
+void fast_syscall_user(void);
+void fast_alloca(void);
+void fast_unaligned(void);
+void fast_second_level_miss(void);
+void fast_store_prohibited(void);
+void fast_coprocessor(void);
+
+void do_IRQ(int hwirq, struct pt_regs *regs);
+void do_page_fault(struct pt_regs *regs);
+void system_call(struct pt_regs *regs);
+
+static void do_illegal_instruction(struct pt_regs *regs);
+static void do_interrupt(struct pt_regs *regs);
+#if XTENSA_FAKE_NMI
+static void do_nmi(struct pt_regs *regs);
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+static void do_unaligned_user(struct pt_regs *regs);
+#endif
+static void do_multihit(struct pt_regs *regs);
+static void do_debug(struct pt_regs *regs);
 
 /*
  * The vector table must be preceded by a save area (which
@@ -197,7 +203,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
  * Multi-hit exception. This if fatal!
  */
 
-void do_multihit(struct pt_regs *regs, unsigned long exccause)
+static void do_multihit(struct pt_regs *regs)
 {
 	die("Caught multihit exception", regs, SIGKILL);
 }
@@ -206,8 +212,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
  * IRQ handler.
  */
 
-extern void do_IRQ(int, struct pt_regs *);
-
 #if XTENSA_FAKE_NMI
 
 #define IS_POW2(v) (((v) & ((v) - 1)) == 0)
@@ -240,7 +244,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
 
 DEFINE_PER_CPU(unsigned long, nmi_count);
 
-void do_nmi(struct pt_regs *regs)
+static void do_nmi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -253,7 +257,7 @@ void do_nmi(struct pt_regs *regs)
 }
 #endif
 
-void do_interrupt(struct pt_regs *regs)
+static void do_interrupt(struct pt_regs *regs)
 {
 	static const unsigned int_level_mask[] = {
 		0,
@@ -303,8 +307,7 @@ void do_interrupt(struct pt_regs *regs)
  * Illegal instruction. Fatal if in kernel space.
  */
 
-void
-do_illegal_instruction(struct pt_regs *regs)
+static void do_illegal_instruction(struct pt_regs *regs)
 {
 	__die_if_kernel("Illegal instruction in kernel", regs, SIGKILL);
 
@@ -324,8 +327,7 @@ do_illegal_instruction(struct pt_regs *regs)
  */
 
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-void
-do_unaligned_user (struct pt_regs *regs)
+static void do_unaligned_user(struct pt_regs *regs)
 {
 	__die_if_kernel("Unhandled unaligned exception in kernel",
 			regs, SIGKILL);
@@ -346,8 +348,7 @@ do_unaligned_user (struct pt_regs *regs)
  * breakpoint structures to debug registers intact, so that
  * DEBUGCAUSE.DBNUM could be used in case of data breakpoint hit.
  */
-void
-do_debug(struct pt_regs *regs)
+static void do_debug(struct pt_regs *regs)
 {
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	int ret = check_hw_breakpoint(regs);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] xtensa: clean up declarations in coprocessor.h
  2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
  2022-04-16  4:25 ` [PATCH 1/6] xtensa: clean up function declarations in traps.c Max Filippov
@ 2022-04-16  4:25 ` Max Filippov
  2022-04-16  4:25 ` [PATCH 3/6] xtensa: clean up excsave1 initialization Max Filippov
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Drop 'extern' from all function declarations. Add argument names in
declarations.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/coprocessor.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index 0fbe2a740b8d..a360efced7e7 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -143,10 +143,9 @@ typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
 	__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
 
 extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
-extern void coprocessor_flush(struct thread_info*, int);
-
-extern void coprocessor_release_all(struct thread_info*);
-extern void coprocessor_flush_all(struct thread_info*);
+void coprocessor_flush(struct thread_info *ti, int cp_index);
+void coprocessor_release_all(struct thread_info *ti);
+void coprocessor_flush_all(struct thread_info *ti);
 
 #endif	/* XTENSA_HAVE_COPROCESSORS */
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] xtensa: clean up excsave1 initialization
  2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
  2022-04-16  4:25 ` [PATCH 1/6] xtensa: clean up function declarations in traps.c Max Filippov
  2022-04-16  4:25 ` [PATCH 2/6] xtensa: clean up declarations in coprocessor.h Max Filippov
@ 2022-04-16  4:25 ` Max Filippov
  2022-04-16  4:25 ` [PATCH 4/6] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Use xtensa_set_sr instead of inline assembly.
Rename local variable exc_table in early_trap_init to avoid conflict
with per-CPU variable of the same name.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/traps.h | 4 ++--
 arch/xtensa/kernel/traps.c      | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 6fa47cd8e02d..c9c5f59db420 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -45,11 +45,11 @@ void fast_second_level_miss(void);
 /* Initialize minimal exc_table structure sufficient for basic paging */
 static inline void __init early_trap_init(void)
 {
-	static struct exc_table exc_table __initdata = {
+	static struct exc_table init_exc_table __initdata = {
 		.fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
 			fast_second_level_miss,
 	};
-	__asm__ __volatile__("wsr  %0, excsave1\n" : : "a" (&exc_table));
+	xtensa_set_sr(&init_exc_table, excsave1);
 }
 
 void secondary_trap_init(void);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index a85992d60c11..f6855eb92614 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -386,8 +386,7 @@ void * __init trap_set_handler(int cause, void *handler)
 
 static void trap_init_excsave(void)
 {
-	unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table);
-	__asm__ __volatile__("wsr  %0, excsave1\n" : : "a" (excsave1));
+	xtensa_set_sr(this_cpu_ptr(&exc_table), excsave1);
 }
 
 static void trap_init_debug(void)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] xtensa: use callx0 opcode in fast_coprocessor
  2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
                   ` (2 preceding siblings ...)
  2022-04-16  4:25 ` [PATCH 3/6] xtensa: clean up excsave1 initialization Max Filippov
@ 2022-04-16  4:25 ` Max Filippov
  2022-04-16  4:25 ` [PATCH 5/6] xtensa: add xtensa_xsr macro Max Filippov
  2022-04-16  4:25 ` [PATCH 6/6] xtensa: support coprocessors on SMP Max Filippov
  5 siblings, 0 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Instead of emulating call0 in fast_coprocessor use that opcode directly.
Use 'ret' instead of 'jx a0'.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/coprocessor.S | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index c7b9f12896f2..8bcbabbff38a 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -30,7 +30,7 @@
 		.align 4;						\
 	.Lsave_cp_regs_cp##x:						\
 		xchal_cp##x##_store a2 a3 a4 a5 a6;			\
-		jx	a0;						\
+		ret;							\
 	.endif
 
 #define SAVE_CP_REGS_TAB(x)						\
@@ -47,7 +47,7 @@
 		.align 4;						\
 	.Lload_cp_regs_cp##x:						\
 		xchal_cp##x##_load a2 a3 a4 a5 a6;			\
-		jx	a0;						\
+		ret;							\
 	.endif
 
 #define LOAD_CP_REGS_TAB(x)						\
@@ -163,21 +163,20 @@ ENTRY(fast_coprocessor)
 	s32i	a5, a4, THREAD_CPENABLE
 
 	/*
-	 * Get context save area and 'call' save routine. 
+	 * Get context save area and call save routine.
 	 * (a4 still holds previous owner (thread_info), a3 CP number)
 	 */
 
 	movi	a5, .Lsave_cp_regs_jump_table
-	movi	a0, 2f			# a0: 'return' address
 	addx8	a3, a3, a5		# a3: coprocessor number
 	l32i	a2, a3, 4		# a2: xtregs offset
 	l32i	a3, a3, 0		# a3: jump address
 	add	a2, a2, a4
-	jx	a3
+	callx0	a3
 
 	/* Note that only a0 and a1 were preserved. */
 
-2:	rsr	a3, exccause
+	rsr	a3, exccause
 	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
 	movi	a0, coprocessor_owner
 	addx4	a0, a3, a0
@@ -187,19 +186,18 @@ ENTRY(fast_coprocessor)
 1:	GET_THREAD_INFO (a4, a1)
 	s32i	a4, a0, 0
 
-	/* Get context save area and 'call' load routine. */
+	/* Get context save area and call load routine. */
 
 	movi	a5, .Lload_cp_regs_jump_table
-	movi	a0, 1f
 	addx8	a3, a3, a5
 	l32i	a2, a3, 4		# a2: xtregs offset
 	l32i	a3, a3, 0		# a3: jump address
 	add	a2, a2, a4
-	jx	a3
+	callx0	a3
 
 	/* Restore all registers and return from exception handler. */
 
-1:	l32i	a6, a1, PT_AREG6
+	l32i	a6, a1, PT_AREG6
 	l32i	a5, a1, PT_AREG5
 	l32i	a4, a1, PT_AREG4
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] xtensa: add xtensa_xsr macro
  2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
                   ` (3 preceding siblings ...)
  2022-04-16  4:25 ` [PATCH 4/6] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
@ 2022-04-16  4:25 ` Max Filippov
  2022-04-16  4:25 ` [PATCH 6/6] xtensa: support coprocessors on SMP Max Filippov
  5 siblings, 0 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

xtensa_xsr does the XSR instruction for the specified special register.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/processor.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..76bc63127c66 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -246,6 +246,13 @@ extern unsigned long __get_wchan(struct task_struct *p);
 	 v; \
 	 })
 
+#define xtensa_xsr(x, sr) \
+	({ \
+	 unsigned int __v__ = (unsigned int)(x); \
+	 __asm__ __volatile__ ("xsr %0, " __stringify(sr) : "+a"(__v__)); \
+	 __v__; \
+	 })
+
 #if XCHAL_HAVE_EXTERN_REGS
 
 static inline void set_er(unsigned long value, unsigned long addr)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] xtensa: support coprocessors on SMP
  2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
                   ` (4 preceding siblings ...)
  2022-04-16  4:25 ` [PATCH 5/6] xtensa: add xtensa_xsr macro Max Filippov
@ 2022-04-16  4:25 ` Max Filippov
  5 siblings, 0 replies; 7+ messages in thread
From: Max Filippov @ 2022-04-16  4:25 UTC (permalink / raw)
  To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov

Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.

Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
coprocessor_flush_release_all to do flush followed by release
efficiently.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/coprocessor.h |  3 +-
 arch/xtensa/include/asm/thread_info.h |  7 ++-
 arch/xtensa/include/asm/traps.h       |  7 +++
 arch/xtensa/kernel/asm-offsets.c      |  8 ++-
 arch/xtensa/kernel/coprocessor.S      | 43 +++++++++++-----
 arch/xtensa/kernel/entry.S            | 17 +++++++
 arch/xtensa/kernel/process.c          | 70 +++++++++++++++++----------
 arch/xtensa/kernel/traps.c            | 13 ++++-
 8 files changed, 125 insertions(+), 43 deletions(-)

diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index a360efced7e7..dc53bd015c5f 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -142,10 +142,11 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
 typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
 	__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
 
-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
+struct thread_info;
 void coprocessor_flush(struct thread_info *ti, int cp_index);
 void coprocessor_release_all(struct thread_info *ti);
 void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);
 
 #endif	/* XTENSA_HAVE_COPROCESSORS */
 
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index f6fcbba1d02f..52974317a6b6 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -52,12 +52,17 @@ struct thread_info {
 	__u32			cpu;		/* current CPU */
 	__s32			preempt_count;	/* 0 => preemptable,< 0 => BUG*/
 
-	unsigned long		cpenable;
 #if XCHAL_HAVE_EXCLUSIVE
 	/* result of the most recent exclusive store */
 	unsigned long		atomctl8;
 #endif
 
+	/*
+	 * If i-th bit is set then coprocessor state is loaded into the
+	 * coprocessor i on CPU cp_owner_cpu.
+	 */
+	unsigned long		cpenable;
+	u32			cp_owner_cpu;
 	/* Allocate storage for extra user states and coprocessor states. */
 #if XTENSA_HAVE_COPROCESSORS
 	xtregs_coprocessor_t	xtregs_cp;
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index c9c5f59db420..6b292facf7a7 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -12,6 +12,7 @@
 
 #include <asm/ptrace.h>
 
+struct thread_info;
 /*
  * Per-CPU exception handling data structure.
  * EXCSAVE1 points to it.
@@ -25,6 +26,10 @@ struct exc_table {
 	void *fixup;
 	/* For passing a parameter to fixup */
 	void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+	/* Pointers to owner struct thread_info */
+	struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
 	/* Fast user exception handlers */
 	void *fast_user_handler[EXCCAUSE_N];
 	/* Fast kernel exception handlers */
@@ -33,6 +38,8 @@ struct exc_table {
 	void *default_handler[EXCCAUSE_N];
 };
 
+DECLARE_PER_CPU(struct exc_table, exc_table);
+
 /*
  * handler must be either of the following:
  *  void (*)(struct pt_regs *regs);
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 37278e2785fb..e3b9cf4c2289 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -91,10 +91,12 @@ int main(void)
 	/* struct thread_info (offset from start_struct) */
 	DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
 	DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
-	DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
 #if XCHAL_HAVE_EXCLUSIVE
 	DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
 #endif
+	DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+	DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+	DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
 #if XTENSA_HAVE_COPROCESSORS
 	DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
 	DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,6 +139,10 @@ int main(void)
 	DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
 	DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
 	DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+	DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+	       offsetof(struct exc_table, coprocessor_owner));
+#endif
 	DEFINE(EXC_TABLE_FAST_USER,
 	       offsetof(struct exc_table, fast_user_handler));
 	DEFINE(EXC_TABLE_FAST_KERNEL,
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 8bcbabbff38a..1e2bfcf9f0cf 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -115,9 +115,32 @@
 
 ENTRY(fast_coprocessor)
 
+	s32i	a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+	/*
+	 * Check if any coprocessor context is live on another CPU
+	 * and if so go through the C-level coprocessor exception handler
+	 * to flush it to memory.
+	 */
+
+	GET_THREAD_INFO (a0, a2)
+	l32i	a3, a0, THREAD_CPENABLE
+	beqz	a3, .Lload_local
+	l32i	a3, a0, THREAD_CPU
+	l32i	a0, a0, THREAD_CP_OWNER_CPU
+	beq	a0, a3, .Lload_local
+
+	rsr	a0, ps
+	l32i	a3, a2, PT_AREG3
+	bbci.l	a0, PS_UM_BIT, 1f
+	call0	user_exception
+1:	call0	kernel_exception
+#endif
+
 	/* Save remaining registers a1-a3 and SAR */
 
-	s32i	a3, a2, PT_AREG3
+.Lload_local:
 	rsr	a3, sar
 	s32i	a1, a2, PT_AREG1
 	s32i	a3, a2, PT_SAR
@@ -150,9 +173,9 @@ ENTRY(fast_coprocessor)
 
 	/* Retrieve previous owner. (a3 still holds CP number) */
 
-	movi	a0, coprocessor_owner	# list of owners
+	rsr	a0, excsave1		# exc_table
 	addx4	a0, a3, a0		# entry for CP
-	l32i	a4, a0, 0
+	l32i	a4, a0, EXC_TABLE_COPROCESSOR_OWNER
 
 	beqz	a4, 1f			# skip 'save' if no previous owner
 
@@ -178,13 +201,15 @@ ENTRY(fast_coprocessor)
 
 	rsr	a3, exccause
 	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
-	movi	a0, coprocessor_owner
+	rsr	a0, excsave1		# exc_table
 	addx4	a0, a3, a0
 
 	/* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
 
 1:	GET_THREAD_INFO (a4, a1)
-	s32i	a4, a0, 0
+	l32i	a5, a4, THREAD_CPU
+	s32i	a4, a0, EXC_TABLE_COPROCESSOR_OWNER
+	s32i	a5, a4, THREAD_CP_OWNER_CPU
 
 	/* Get context save area and call load routine. */
 
@@ -245,12 +270,4 @@ ENTRY(coprocessor_flush)
 
 ENDPROC(coprocessor_flush)
 
-	.data
-
-ENTRY(coprocessor_owner)
-
-	.fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
 #endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index b9bcb2cd74dd..033443b4ce87 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -2087,9 +2087,26 @@ ENTRY(_switch_to)
 	/* Switch CPENABLE */
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
+#ifdef CONFIG_SMP
 	l32i	a3, a5, THREAD_CPENABLE
+	beqz	a3, 1f
+	l32i	a6, a5, THREAD_CP_OWNER_CPU
+	l32i	a7, a5, THREAD_CPU
+	beq	a6, a7, 1f	# load 0 into CPENABLE if current CPU is not the owner
+	movi	a3, 0
+1:
 	xsr	a3, cpenable
+
+	l32i	a6, a4, THREAD_CP_OWNER_CPU
+	l32i	a7, a4, THREAD_CPU
+	bne	a6, a7, 1f	# skip saving CPENABLE if current CPU was not the owner
 	s32i	a3, a4, THREAD_CPENABLE
+1:
+#else
+	l32i	a3, a5, THREAD_CPENABLE
+	xsr	a3, cpenable
+	s32i	a3, a4, THREAD_CPENABLE
+#endif
 #endif
 
 #if XCHAL_HAVE_EXCLUSIVE
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e8bfbca5f001..a17c37fbd93c 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/regs.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/traps.h>
 
 extern void ret_from_fork(void);
 extern void ret_from_kernel_thread(void);
@@ -63,52 +64,70 @@ EXPORT_SYMBOL(__stack_chk_guard);
 
 #if XTENSA_HAVE_COPROCESSORS
 
-void coprocessor_release_all(struct thread_info *ti)
+static void local_coprocessor_release_all(void *info)
 {
-	unsigned long cpenable;
+	struct thread_info *ti = info;
+	struct thread_info **coprocessor_owner;
 	int i;
 
-	/* Make sure we don't switch tasks during this operation. */
-
-	preempt_disable();
+	coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
 
 	/* Walk through all cp owners and release it for the requested one. */
 
-	cpenable = ti->cpenable;
-
 	for (i = 0; i < XCHAL_CP_MAX; i++) {
-		if (coprocessor_owner[i] == ti) {
-			coprocessor_owner[i] = 0;
-			cpenable &= ~(1 << i);
-		}
+		if (coprocessor_owner[i] == ti)
+			coprocessor_owner[i] = NULL;
 	}
-
-	ti->cpenable = cpenable;
+	ti->cpenable = 0;
 	if (ti == current_thread_info())
 		xtensa_set_sr(0, cpenable);
+}
 
-	preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+	if (ti->cpenable)
+		smp_call_function_single(ti->cp_owner_cpu,
+					 local_coprocessor_release_all,
+					 ti, true);
 }
 
-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
 {
-	unsigned long cpenable, old_cpenable;
+	struct thread_info *ti = info;
+	struct thread_info **coprocessor_owner;
+	unsigned long old_cpenable;
 	int i;
 
-	preempt_disable();
-
-	old_cpenable = xtensa_get_sr(cpenable);
-	cpenable = ti->cpenable;
-	xtensa_set_sr(cpenable, cpenable);
+	coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+	old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
 
 	for (i = 0; i < XCHAL_CP_MAX; i++) {
-		if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+		if (coprocessor_owner[i] == ti)
 			coprocessor_flush(ti, i);
-		cpenable >>= 1;
 	}
 	xtensa_set_sr(old_cpenable, cpenable);
+}
+
+void coprocessor_flush_all(struct thread_info *ti)
+{
+	if (ti->cpenable)
+		smp_call_function_single(ti->cp_owner_cpu,
+					 local_coprocessor_flush_all,
+					 ti, true);
+}
 
-	preempt_enable();
+static void local_coprocessor_flush_release_all(void *info)
+{
+	local_coprocessor_flush_all(info);
+	local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+	if (ti->cpenable)
+		smp_call_function_single(ti->cp_owner_cpu,
+					 local_coprocessor_flush_release_all,
+					 ti, true);
 }
 
 #endif
@@ -140,8 +159,7 @@ void flush_thread(void)
 {
 #if XTENSA_HAVE_COPROCESSORS
 	struct thread_info *ti = current_thread_info();
-	coprocessor_flush_all(ti);
-	coprocessor_release_all(ti);
+	coprocessor_flush_release_all(ti);
 #endif
 	flush_ptrace_hw_breakpoint(current);
 }
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index f6855eb92614..9b8a76d4fc05 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -72,6 +72,9 @@ static void do_nmi(struct pt_regs *regs);
 static void do_unaligned_user(struct pt_regs *regs);
 #endif
 static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
 static void do_debug(struct pt_regs *regs);
 
 /*
@@ -84,7 +87,8 @@ static void do_debug(struct pt_regs *regs);
 #define USER		0x02
 
 #define COPROCESSOR(x)							\
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor },	\
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
 
 typedef struct {
 	int cause;
@@ -342,6 +346,13 @@ static void do_unaligned_user(struct pt_regs *regs)
 }
 #endif
 
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+	coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
 /* Handle debug events.
  * When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
  * preemption disabled to avoid rescheduling and keep mapping of hardware
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-04-16  4:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-16  4:25 [PATCH 0/6] xtensa: support coprocessors on SMP Max Filippov
2022-04-16  4:25 ` [PATCH 1/6] xtensa: clean up function declarations in traps.c Max Filippov
2022-04-16  4:25 ` [PATCH 2/6] xtensa: clean up declarations in coprocessor.h Max Filippov
2022-04-16  4:25 ` [PATCH 3/6] xtensa: clean up excsave1 initialization Max Filippov
2022-04-16  4:25 ` [PATCH 4/6] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
2022-04-16  4:25 ` [PATCH 5/6] xtensa: add xtensa_xsr macro Max Filippov
2022-04-16  4:25 ` [PATCH 6/6] xtensa: support coprocessors on SMP Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).