* [PATCH v2 00/10] xtensa: support coprocessors on SMP
@ 2022-04-21 10:10 Max Filippov
2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
` (9 more replies)
0 siblings, 10 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Hello,
this series does a bunch of small cleanups around exception and
coprocessor handling code and adds coprocessors support in SMP
configurations.
Changes v1->v2:
- clean up exception handler prototypes
- merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB
- get rid of stack frame in coprocessor_flush
- document rules for coprocessor context management
- clean up context management from the LKMM point of view, introduce
and document barriers
- support CPU hotplug
Max Filippov (10):
xtensa: clean up function declarations in traps.c
xtensa: clean up exception handler prototypes
xtensa: clean up declarations in coprocessor.h
xtensa: clean up excsave1 initialization
xtensa: use callx0 opcode in fast_coprocessor
xtensa: handle coprocessor exceptions in kernel mode
xtensa: add xtensa_xsr macro
xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB
xtensa: get rid of stack frame in coprocessor_flush
xtensa: support coprocessors on SMP
arch/xtensa/include/asm/coprocessor.h | 11 +-
arch/xtensa/include/asm/processor.h | 7 +
arch/xtensa/include/asm/thread_info.h | 7 +-
arch/xtensa/include/asm/traps.h | 40 +++--
arch/xtensa/kernel/asm-offsets.c | 8 +-
arch/xtensa/kernel/coprocessor.S | 230 +++++++++++++++-----------
arch/xtensa/kernel/entry.S | 12 +-
arch/xtensa/kernel/process.c | 112 ++++++++++---
arch/xtensa/kernel/ptrace.c | 3 +-
arch/xtensa/kernel/s32c1i_selftest.c | 7 +-
arch/xtensa/kernel/signal.c | 3 +-
arch/xtensa/kernel/smp.c | 7 +
arch/xtensa/kernel/traps.c | 69 ++++----
13 files changed, 334 insertions(+), 182 deletions(-)
--
2.30.2
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 01/10] xtensa: clean up function declarations in traps.c
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 02/10] xtensa: clean up exception handler prototypes Max Filippov
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Drop 'extern' from all function declarations and move those that need to
be visible from traps.c to traps.h. Add 'asmlinkage' to declarations of
fucntions defined in assembly. Add 'static' to declarations and
definitions only used locally. Add argument names in declarations.
Drop unused second argument from do_multihit and do_page_fault.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- move declarations to traps.h and add 'asmlinkage' annotations
arch/xtensa/include/asm/traps.h | 18 +++++++++++--
arch/xtensa/kernel/traps.c | 46 ++++++++++++---------------------
2 files changed, 32 insertions(+), 32 deletions(-)
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 6fa47cd8e02d..fc63217232a4 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -39,8 +39,22 @@ struct exc_table {
* void (*)(struct pt_regs *regs, unsigned long exccause);
*/
extern void * __init trap_set_handler(int cause, void *handler);
-extern void do_unhandled(struct pt_regs *regs, unsigned long exccause);
-void fast_second_level_miss(void);
+
+asmlinkage void fast_illegal_instruction_user(void);
+asmlinkage void fast_syscall_user(void);
+asmlinkage void fast_alloca(void);
+asmlinkage void fast_unaligned(void);
+asmlinkage void fast_second_level_miss(void);
+asmlinkage void fast_store_prohibited(void);
+asmlinkage void fast_coprocessor(void);
+
+asmlinkage void kernel_exception(void);
+asmlinkage void user_exception(void);
+asmlinkage void system_call(struct pt_regs *regs);
+
+void do_IRQ(int hwirq, struct pt_regs *regs);
+void do_page_fault(struct pt_regs *regs);
+void do_unhandled(struct pt_regs *regs, unsigned long exccause);
/* Initialize minimal exc_table structure sufficient for basic paging */
static inline void __init early_trap_init(void)
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 515719c7e750..b6bb5911ec7f 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -48,25 +48,16 @@
* Machine specific interrupt handlers
*/
-extern void kernel_exception(void);
-extern void user_exception(void);
-
-extern void fast_illegal_instruction_user(void);
-extern void fast_syscall_user(void);
-extern void fast_alloca(void);
-extern void fast_unaligned(void);
-extern void fast_second_level_miss(void);
-extern void fast_store_prohibited(void);
-extern void fast_coprocessor(void);
-
-extern void do_illegal_instruction (struct pt_regs*);
-extern void do_interrupt (struct pt_regs*);
-extern void do_nmi(struct pt_regs *);
-extern void do_unaligned_user (struct pt_regs*);
-extern void do_multihit (struct pt_regs*, unsigned long);
-extern void do_page_fault (struct pt_regs*, unsigned long);
-extern void do_debug (struct pt_regs*);
-extern void system_call (struct pt_regs*);
+static void do_illegal_instruction(struct pt_regs *regs);
+static void do_interrupt(struct pt_regs *regs);
+#if XTENSA_FAKE_NMI
+static void do_nmi(struct pt_regs *regs);
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+static void do_unaligned_user(struct pt_regs *regs);
+#endif
+static void do_multihit(struct pt_regs *regs);
+static void do_debug(struct pt_regs *regs);
/*
* The vector table must be preceded by a save area (which
@@ -197,7 +188,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
* Multi-hit exception. This if fatal!
*/
-void do_multihit(struct pt_regs *regs, unsigned long exccause)
+static void do_multihit(struct pt_regs *regs)
{
die("Caught multihit exception", regs, SIGKILL);
}
@@ -206,8 +197,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
* IRQ handler.
*/
-extern void do_IRQ(int, struct pt_regs *);
-
#if XTENSA_FAKE_NMI
#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
@@ -240,7 +229,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
DEFINE_PER_CPU(unsigned long, nmi_count);
-void do_nmi(struct pt_regs *regs)
+static void do_nmi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -253,7 +242,7 @@ void do_nmi(struct pt_regs *regs)
}
#endif
-void do_interrupt(struct pt_regs *regs)
+static void do_interrupt(struct pt_regs *regs)
{
static const unsigned int_level_mask[] = {
0,
@@ -303,8 +292,7 @@ void do_interrupt(struct pt_regs *regs)
* Illegal instruction. Fatal if in kernel space.
*/
-void
-do_illegal_instruction(struct pt_regs *regs)
+static void do_illegal_instruction(struct pt_regs *regs)
{
__die_if_kernel("Illegal instruction in kernel", regs, SIGKILL);
@@ -324,8 +312,7 @@ do_illegal_instruction(struct pt_regs *regs)
*/
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-void
-do_unaligned_user (struct pt_regs *regs)
+static void do_unaligned_user(struct pt_regs *regs)
{
__die_if_kernel("Unhandled unaligned exception in kernel",
regs, SIGKILL);
@@ -346,8 +333,7 @@ do_unaligned_user (struct pt_regs *regs)
* breakpoint structures to debug registers intact, so that
* DEBUGCAUSE.DBNUM could be used in case of data breakpoint hit.
*/
-void
-do_debug(struct pt_regs *regs)
+static void do_debug(struct pt_regs *regs)
{
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int ret = check_hw_breakpoint(regs);
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 02/10] xtensa: clean up exception handler prototypes
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h Max Filippov
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Exception handlers are currently passed as void pointers because they
may have one or two parameters. Only two handlers uses the second
parameter and it is available in the struct pt_regs anyway. Make all
handlers have only one parameter, introduce xtensa_exception_handler
type for handlers and use it in trap_set_handler.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- new patch
arch/xtensa/include/asm/traps.h | 14 ++++++--------
arch/xtensa/kernel/s32c1i_selftest.c | 7 +++----
arch/xtensa/kernel/traps.c | 7 ++++---
3 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index fc63217232a4..bfdb0af61b07 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -12,6 +12,8 @@
#include <asm/ptrace.h>
+typedef void xtensa_exception_handler(struct pt_regs *regs);
+
/*
* Per-CPU exception handling data structure.
* EXCSAVE1 points to it.
@@ -30,15 +32,11 @@ struct exc_table {
/* Fast kernel exception handlers */
void *fast_kernel_handler[EXCCAUSE_N];
/* Default C-Handlers */
- void *default_handler[EXCCAUSE_N];
+ xtensa_exception_handler *default_handler[EXCCAUSE_N];
};
-/*
- * handler must be either of the following:
- * void (*)(struct pt_regs *regs);
- * void (*)(struct pt_regs *regs, unsigned long exccause);
- */
-extern void * __init trap_set_handler(int cause, void *handler);
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler);
asmlinkage void fast_illegal_instruction_user(void);
asmlinkage void fast_syscall_user(void);
@@ -54,7 +52,7 @@ asmlinkage void system_call(struct pt_regs *regs);
void do_IRQ(int hwirq, struct pt_regs *regs);
void do_page_fault(struct pt_regs *regs);
-void do_unhandled(struct pt_regs *regs, unsigned long exccause);
+void do_unhandled(struct pt_regs *regs);
/* Initialize minimal exc_table structure sufficient for basic paging */
static inline void __init early_trap_init(void)
diff --git a/arch/xtensa/kernel/s32c1i_selftest.c b/arch/xtensa/kernel/s32c1i_selftest.c
index 07e56e3a9a8b..8362388c8719 100644
--- a/arch/xtensa/kernel/s32c1i_selftest.c
+++ b/arch/xtensa/kernel/s32c1i_selftest.c
@@ -40,14 +40,13 @@ static inline int probed_compare_swap(int *v, int cmp, int set)
/* Handle probed exception */
-static void __init do_probed_exception(struct pt_regs *regs,
- unsigned long exccause)
+static void __init do_probed_exception(struct pt_regs *regs)
{
if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */
regs->pc += 3; /* skip the s32c1i instruction */
- rcw_exc = exccause;
+ rcw_exc = regs->exccause;
} else {
- do_unhandled(regs, exccause);
+ do_unhandled(regs);
}
}
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index b6bb5911ec7f..d6b1a0c3e319 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -170,7 +170,7 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err)
* Unhandled Exceptions. Kill user task or panic if in kernel space.
*/
-void do_unhandled(struct pt_regs *regs, unsigned long exccause)
+void do_unhandled(struct pt_regs *regs)
{
__die_if_kernel("Caught unhandled exception - should not happen",
regs, SIGKILL);
@@ -180,7 +180,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
"(pid = %d, pc = %#010lx) - should not happen\n"
"\tEXCCAUSE is %ld\n",
current->comm, task_pid_nr(current), regs->pc,
- exccause);
+ regs->exccause);
force_sig(SIGILL);
}
@@ -360,7 +360,8 @@ static void do_debug(struct pt_regs *regs)
/* Set exception C handler - for temporary use when probing exceptions */
-void * __init trap_set_handler(int cause, void *handler)
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler)
{
void *previous = per_cpu(exc_table, 0).default_handler[cause];
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
2022-04-21 10:10 ` [PATCH v2 02/10] xtensa: clean up exception handler prototypes Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 04/10] xtensa: clean up excsave1 initialization Max Filippov
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Drop 'extern' from all function declarations. Add parameter names in
declarations.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
arch/xtensa/include/asm/coprocessor.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index 0fbe2a740b8d..a360efced7e7 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -143,10 +143,9 @@ typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
-extern void coprocessor_flush(struct thread_info*, int);
-
-extern void coprocessor_release_all(struct thread_info*);
-extern void coprocessor_flush_all(struct thread_info*);
+void coprocessor_flush(struct thread_info *ti, int cp_index);
+void coprocessor_release_all(struct thread_info *ti);
+void coprocessor_flush_all(struct thread_info *ti);
#endif /* XTENSA_HAVE_COPROCESSORS */
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 04/10] xtensa: clean up excsave1 initialization
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (2 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Use xtensa_set_sr instead of inline assembly.
Rename local variable exc_table in early_trap_init to avoid conflict
with per-CPU variable of the same name.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
arch/xtensa/include/asm/traps.h | 4 ++--
arch/xtensa/kernel/traps.c | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index bfdb0af61b07..514376eff58c 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -57,11 +57,11 @@ void do_unhandled(struct pt_regs *regs);
/* Initialize minimal exc_table structure sufficient for basic paging */
static inline void __init early_trap_init(void)
{
- static struct exc_table exc_table __initdata = {
+ static struct exc_table init_exc_table __initdata = {
.fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
fast_second_level_miss,
};
- __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (&exc_table));
+ xtensa_set_sr(&init_exc_table, excsave1);
}
void secondary_trap_init(void);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index d6b1a0c3e319..95903f25e523 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -372,8 +372,7 @@ __init trap_set_handler(int cause, xtensa_exception_handler *handler)
static void trap_init_excsave(void)
{
- unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table);
- __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (excsave1));
+ xtensa_set_sr(this_cpu_ptr(&exc_table), excsave1);
}
static void trap_init_debug(void)
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (3 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 04/10] xtensa: clean up excsave1 initialization Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode Max Filippov
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Instead of emulating call0 in fast_coprocessor use that opcode directly.
Use 'ret' instead of 'jx a0'.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
arch/xtensa/kernel/coprocessor.S | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index c7b9f12896f2..8bcbabbff38a 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -30,7 +30,7 @@
.align 4; \
.Lsave_cp_regs_cp##x: \
xchal_cp##x##_store a2 a3 a4 a5 a6; \
- jx a0; \
+ ret; \
.endif
#define SAVE_CP_REGS_TAB(x) \
@@ -47,7 +47,7 @@
.align 4; \
.Lload_cp_regs_cp##x: \
xchal_cp##x##_load a2 a3 a4 a5 a6; \
- jx a0; \
+ ret; \
.endif
#define LOAD_CP_REGS_TAB(x) \
@@ -163,21 +163,20 @@ ENTRY(fast_coprocessor)
s32i a5, a4, THREAD_CPENABLE
/*
- * Get context save area and 'call' save routine.
+ * Get context save area and call save routine.
* (a4 still holds previous owner (thread_info), a3 CP number)
*/
movi a5, .Lsave_cp_regs_jump_table
- movi a0, 2f # a0: 'return' address
addx8 a3, a3, a5 # a3: coprocessor number
l32i a2, a3, 4 # a2: xtregs offset
l32i a3, a3, 0 # a3: jump address
add a2, a2, a4
- jx a3
+ callx0 a3
/* Note that only a0 and a1 were preserved. */
-2: rsr a3, exccause
+ rsr a3, exccause
addi a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
movi a0, coprocessor_owner
addx4 a0, a3, a0
@@ -187,19 +186,18 @@ ENTRY(fast_coprocessor)
1: GET_THREAD_INFO (a4, a1)
s32i a4, a0, 0
- /* Get context save area and 'call' load routine. */
+ /* Get context save area and call load routine. */
movi a5, .Lload_cp_regs_jump_table
- movi a0, 1f
addx8 a3, a3, a5
l32i a2, a3, 4 # a2: xtregs offset
l32i a3, a3, 0 # a3: jump address
add a2, a2, a4
- jx a3
+ callx0 a3
/* Restore all registers and return from exception handler. */
-1: l32i a6, a1, PT_AREG6
+ l32i a6, a1, PT_AREG6
l32i a5, a1, PT_AREG5
l32i a4, a1, PT_AREG4
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (4 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 07/10] xtensa: add xtensa_xsr macro Max Filippov
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
In order to let drivers use xtensa coprocessors on behalf of the calling
process the kernel must handle coprocessor exceptions from the kernel
mode the same way as from the user mode.
This is not sufficient to allow using coprocessors transparently in IRQ
or softirq context. Should such users exist they must be aware of the
context and do the right thing, e.g. preserve the coprocessor state and
resore it after use.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
arch/xtensa/kernel/traps.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 95903f25e523..62c497605128 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -69,7 +69,7 @@ static void do_debug(struct pt_regs *regs);
#define USER 0x02
#define COPROCESSOR(x) \
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor }
typedef struct {
int cause;
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 07/10] xtensa: add xtensa_xsr macro
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (5 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB Max Filippov
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
xtensa_xsr does the XSR instruction for the specified special register.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
arch/xtensa/include/asm/processor.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..76bc63127c66 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -246,6 +246,13 @@ extern unsigned long __get_wchan(struct task_struct *p);
v; \
})
+#define xtensa_xsr(x, sr) \
+ ({ \
+ unsigned int __v__ = (unsigned int)(x); \
+ __asm__ __volatile__ ("xsr %0, " __stringify(sr) : "+a"(__v__)); \
+ __v__; \
+ })
+
#if XCHAL_HAVE_EXTERN_REGS
static inline void set_er(unsigned long value, unsigned long addr)
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (6 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 07/10] xtensa: add xtensa_xsr macro Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush Max Filippov
2022-04-21 10:10 ` [PATCH v2 10/10] xtensa: support coprocessors on SMP Max Filippov
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Both tables share the same offset field but the different function
pointers. Merge them into single table with 3-element entries to reduce
code and data duplication.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- new patch
arch/xtensa/kernel/coprocessor.S | 85 ++++++++++++++------------------
1 file changed, 37 insertions(+), 48 deletions(-)
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 8bcbabbff38a..af11ddaa8c5f 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -33,15 +33,6 @@
ret; \
.endif
-#define SAVE_CP_REGS_TAB(x) \
- .if XTENSA_HAVE_COPROCESSOR(x); \
- .long .Lsave_cp_regs_cp##x; \
- .else; \
- .long 0; \
- .endif; \
- .long THREAD_XTREGS_CP##x
-
-
#define LOAD_CP_REGS(x) \
.if XTENSA_HAVE_COPROCESSOR(x); \
.align 4; \
@@ -50,14 +41,19 @@
ret; \
.endif
-#define LOAD_CP_REGS_TAB(x) \
+#define CP_REGS_TAB(x) \
.if XTENSA_HAVE_COPROCESSOR(x); \
+ .long .Lsave_cp_regs_cp##x; \
.long .Lload_cp_regs_cp##x; \
.else; \
- .long 0; \
+ .long 0, 0; \
.endif; \
.long THREAD_XTREGS_CP##x
+#define CP_REGS_TAB_SAVE 0
+#define CP_REGS_TAB_LOAD 4
+#define CP_REGS_TAB_OFFSET 8
+
__XTENSA_HANDLER
SAVE_CP_REGS(0)
@@ -79,25 +75,15 @@
LOAD_CP_REGS(7)
.align 4
-.Lsave_cp_regs_jump_table:
- SAVE_CP_REGS_TAB(0)
- SAVE_CP_REGS_TAB(1)
- SAVE_CP_REGS_TAB(2)
- SAVE_CP_REGS_TAB(3)
- SAVE_CP_REGS_TAB(4)
- SAVE_CP_REGS_TAB(5)
- SAVE_CP_REGS_TAB(6)
- SAVE_CP_REGS_TAB(7)
-
-.Lload_cp_regs_jump_table:
- LOAD_CP_REGS_TAB(0)
- LOAD_CP_REGS_TAB(1)
- LOAD_CP_REGS_TAB(2)
- LOAD_CP_REGS_TAB(3)
- LOAD_CP_REGS_TAB(4)
- LOAD_CP_REGS_TAB(5)
- LOAD_CP_REGS_TAB(6)
- LOAD_CP_REGS_TAB(7)
+.Lcp_regs_jump_table:
+ CP_REGS_TAB(0)
+ CP_REGS_TAB(1)
+ CP_REGS_TAB(2)
+ CP_REGS_TAB(3)
+ CP_REGS_TAB(4)
+ CP_REGS_TAB(5)
+ CP_REGS_TAB(6)
+ CP_REGS_TAB(7)
/*
* Entry condition:
@@ -125,13 +111,12 @@ ENTRY(fast_coprocessor)
rsr a2, depc
s32i a2, a1, PT_AREG2
- /*
- * The hal macros require up to 4 temporary registers. We use a3..a6.
- */
+ /* The hal macros require up to 4 temporary registers. We use a3..a6. */
s32i a4, a1, PT_AREG4
s32i a5, a1, PT_AREG5
s32i a6, a1, PT_AREG6
+ s32i a7, a1, PT_AREG7
/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
@@ -148,6 +133,12 @@ ENTRY(fast_coprocessor)
wsr a0, cpenable
rsync
+ /* Get coprocessor save/load table entry (a7). */
+
+ movi a7, .Lcp_regs_jump_table
+ addx8 a7, a3, a7
+ addx4 a7, a3, a7
+
/* Retrieve previous owner. (a3 still holds CP number) */
movi a0, coprocessor_owner # list of owners
@@ -167,10 +158,8 @@ ENTRY(fast_coprocessor)
* (a4 still holds previous owner (thread_info), a3 CP number)
*/
- movi a5, .Lsave_cp_regs_jump_table
- addx8 a3, a3, a5 # a3: coprocessor number
- l32i a2, a3, 4 # a2: xtregs offset
- l32i a3, a3, 0 # a3: jump address
+ l32i a2, a7, CP_REGS_TAB_OFFSET
+ l32i a3, a7, CP_REGS_TAB_SAVE
add a2, a2, a4
callx0 a3
@@ -188,15 +177,14 @@ ENTRY(fast_coprocessor)
/* Get context save area and call load routine. */
- movi a5, .Lload_cp_regs_jump_table
- addx8 a3, a3, a5
- l32i a2, a3, 4 # a2: xtregs offset
- l32i a3, a3, 0 # a3: jump address
+ l32i a2, a7, CP_REGS_TAB_OFFSET
+ l32i a3, a7, CP_REGS_TAB_LOAD
add a2, a2, a4
callx0 a3
/* Restore all registers and return from exception handler. */
+ l32i a7, a1, PT_AREG7
l32i a6, a1, PT_AREG6
l32i a5, a1, PT_AREG5
l32i a4, a1, PT_AREG4
@@ -232,13 +220,14 @@ ENTRY(coprocessor_flush)
abi_entry(4)
s32i a0, a1, 0
- movi a0, .Lsave_cp_regs_jump_table
- addx8 a3, a3, a0
- l32i a4, a3, 4
- l32i a3, a3, 0
- add a2, a2, a4
- beqz a3, 1f
- callx0 a3
+ movi a4, .Lcp_regs_jump_table
+ addx8 a4, a3, a4
+ addx4 a3, a3, a4
+ l32i a4, a3, CP_REGS_TAB_SAVE
+ beqz a4, 1f
+ l32i a3, a3, CP_REGS_TAB_OFFSET
+ add a2, a2, a3
+ callx0 a4
1: l32i a0, a1, 0
abi_ret(4)
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (7 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
2022-04-21 10:10 ` [PATCH v2 10/10] xtensa: support coprocessors on SMP Max Filippov
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
coprocessor_flush is an ordinary function, it can use all registers.
Don't reserve stack frame for it and use a7 to preserve a0 around the
context saving call.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- new patch
arch/xtensa/kernel/coprocessor.S | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index af11ddaa8c5f..95412409c49e 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -216,10 +216,8 @@ ENDPROC(fast_coprocessor)
ENTRY(coprocessor_flush)
- /* reserve 4 bytes on stack to save a0 */
- abi_entry(4)
+ abi_entry_default
- s32i a0, a1, 0
movi a4, .Lcp_regs_jump_table
addx8 a4, a3, a4
addx4 a3, a3, a4
@@ -227,10 +225,11 @@ ENTRY(coprocessor_flush)
beqz a4, 1f
l32i a3, a3, CP_REGS_TAB_OFFSET
add a2, a2, a3
+ mov a7, a0
callx0 a4
-1: l32i a0, a1, 0
-
- abi_ret(4)
+ mov a0, a7
+1:
+ abi_ret_default
ENDPROC(coprocessor_flush)
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 10/10] xtensa: support coprocessors on SMP
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
` (8 preceding siblings ...)
2022-04-21 10:10 ` [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush Max Filippov
@ 2022-04-21 10:10 ` Max Filippov
9 siblings, 0 replies; 11+ messages in thread
From: Max Filippov @ 2022-04-21 10:10 UTC (permalink / raw)
To: linux-xtensa; +Cc: Chris Zankel, linux-kernel, Max Filippov
Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.
Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
function coprocessor_flush_release_all to do flush followed by release
atomically. Add function local_coprocessors_flush_release_all to flush
and release all coprocessors on the local CPU and use it to flush
coprocessor contexts from the CPU that goes offline.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- document rules for coprocessor context management
- clean up context management from the LKMM point of view, introduce
and document barriers
- support CPU hotplug
arch/xtensa/include/asm/coprocessor.h | 4 +-
arch/xtensa/include/asm/thread_info.h | 7 +-
arch/xtensa/include/asm/traps.h | 6 ++
arch/xtensa/kernel/asm-offsets.c | 8 +-
arch/xtensa/kernel/coprocessor.S | 122 +++++++++++++++++++-------
arch/xtensa/kernel/entry.S | 12 ++-
arch/xtensa/kernel/process.c | 112 +++++++++++++++++------
arch/xtensa/kernel/ptrace.c | 3 +-
arch/xtensa/kernel/signal.c | 3 +-
arch/xtensa/kernel/smp.c | 7 ++
arch/xtensa/kernel/traps.c | 13 ++-
11 files changed, 230 insertions(+), 67 deletions(-)
diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index a360efced7e7..3b1a0d5d2169 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -142,10 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
+struct thread_info;
void coprocessor_flush(struct thread_info *ti, int cp_index);
void coprocessor_release_all(struct thread_info *ti);
void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);
+void local_coprocessors_flush_release_all(void);
#endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index f6fcbba1d02f..52974317a6b6 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -52,12 +52,17 @@ struct thread_info {
__u32 cpu; /* current CPU */
__s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/
- unsigned long cpenable;
#if XCHAL_HAVE_EXCLUSIVE
/* result of the most recent exclusive store */
unsigned long atomctl8;
#endif
+ /*
+ * If i-th bit is set then coprocessor state is loaded into the
+ * coprocessor i on CPU cp_owner_cpu.
+ */
+ unsigned long cpenable;
+ u32 cp_owner_cpu;
/* Allocate storage for extra user states and coprocessor states. */
#if XTENSA_HAVE_COPROCESSORS
xtregs_coprocessor_t xtregs_cp;
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 514376eff58c..6f74ccc0c7ea 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -27,6 +27,10 @@ struct exc_table {
void *fixup;
/* For passing a parameter to fixup */
void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+ /* Pointers to owner struct thread_info */
+ struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
/* Fast user exception handlers */
void *fast_user_handler[EXCCAUSE_N];
/* Fast kernel exception handlers */
@@ -35,6 +39,8 @@ struct exc_table {
xtensa_exception_handler *default_handler[EXCCAUSE_N];
};
+DECLARE_PER_CPU(struct exc_table, exc_table);
+
xtensa_exception_handler *
__init trap_set_handler(int cause, xtensa_exception_handler *handler);
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 37278e2785fb..e3b9cf4c2289 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -91,10 +91,12 @@ int main(void)
/* struct thread_info (offset from start_struct) */
DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
- DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XCHAL_HAVE_EXCLUSIVE
DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
#endif
+ DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+ DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
#if XTENSA_HAVE_COPROCESSORS
DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,6 +139,10 @@ int main(void)
DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+ DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+ offsetof(struct exc_table, coprocessor_owner));
+#endif
DEFINE(EXC_TABLE_FAST_USER,
offsetof(struct exc_table, fast_user_handler));
DEFINE(EXC_TABLE_FAST_KERNEL,
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 95412409c49e..ef33e76e07d8 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -19,6 +19,26 @@
#include <asm/current.h>
#include <asm/regs.h>
+/*
+ * Rules for coprocessor state manipulation on SMP:
+ *
+ * - a task may have live coprocessors only on one CPU.
+ *
+ * - whether coprocessor context of task T is live on some CPU is
+ * denoted by T's thread_info->cpenable.
+ *
+ * - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
+ * is valid in the T's thread_info. Zero thread_info->cpenable means that
+ * coprocessor context is valid in the T's thread_info.
+ *
+ * - if a coprocessor context of task T is live on CPU X, only CPU X changes
+ * T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
+ * This is done by making sure that for the task T with live coprocessor
+ * on CPU X cpenable SR is 0 when T runs on any other CPU Y.
+ * When fast_coprocessor exception is taken on CPU Y it goes to the
+ * C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
+ */
+
#if XTENSA_HAVE_COPROCESSORS
/*
@@ -101,9 +121,37 @@
ENTRY(fast_coprocessor)
+ s32i a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+ /*
+ * Check if any coprocessor context is live on another CPU
+ * and if so go through the C-level coprocessor exception handler
+ * to flush it to memory.
+ */
+ GET_THREAD_INFO (a0, a2)
+ l32i a3, a0, THREAD_CPENABLE
+ beqz a3, .Lload_local
+
+ /*
+ * Pairs with smp_wmb in local_coprocessor_release_all
+ * and with both memws below.
+ */
+ memw
+ l32i a3, a0, THREAD_CPU
+ l32i a0, a0, THREAD_CP_OWNER_CPU
+ beq a0, a3, .Lload_local
+
+ rsr a0, ps
+ l32i a3, a2, PT_AREG3
+ bbci.l a0, PS_UM_BIT, 1f
+ call0 user_exception
+1: call0 kernel_exception
+#endif
+
/* Save remaining registers a1-a3 and SAR */
- s32i a3, a2, PT_AREG3
+.Lload_local:
rsr a3, sar
s32i a1, a2, PT_AREG1
s32i a3, a2, PT_SAR
@@ -117,6 +165,9 @@ ENTRY(fast_coprocessor)
s32i a5, a1, PT_AREG5
s32i a6, a1, PT_AREG6
s32i a7, a1, PT_AREG7
+ s32i a8, a1, PT_AREG8
+ s32i a9, a1, PT_AREG9
+ s32i a10, a1, PT_AREG10
/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
@@ -139,51 +190,66 @@ ENTRY(fast_coprocessor)
addx8 a7, a3, a7
addx4 a7, a3, a7
- /* Retrieve previous owner. (a3 still holds CP number) */
+ /* Retrieve previous owner (a8). */
- movi a0, coprocessor_owner # list of owners
+ rsr a0, excsave1 # exc_table
addx4 a0, a3, a0 # entry for CP
- l32i a4, a0, 0
+ l32i a8, a0, EXC_TABLE_COPROCESSOR_OWNER
- beqz a4, 1f # skip 'save' if no previous owner
+ /* Set new owner (a9). */
- /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
+ GET_THREAD_INFO (a9, a1)
+ l32i a4, a9, THREAD_CPU
+ s32i a9, a0, EXC_TABLE_COPROCESSOR_OWNER
+ s32i a4, a9, THREAD_CP_OWNER_CPU
- l32i a5, a4, THREAD_CPENABLE
- xor a5, a5, a2 # (1 << cp-id) still in a2
- s32i a5, a4, THREAD_CPENABLE
+ /*
+ * Enable coprocessor for the new owner. (a2 = 1 << CP number)
+ * This can be done before loading context into the coprocessor.
+ */
+ l32i a4, a9, THREAD_CPENABLE
+ or a4, a4, a2
/*
- * Get context save area and call save routine.
- * (a4 still holds previous owner (thread_info), a3 CP number)
+ * Make sure THREAD_CP_OWNER_CPU is in memory before updating
+ * THREAD_CPENABLE
*/
+ memw # (2)
+ s32i a4, a9, THREAD_CPENABLE
- l32i a2, a7, CP_REGS_TAB_OFFSET
- l32i a3, a7, CP_REGS_TAB_SAVE
- add a2, a2, a4
- callx0 a3
+ beqz a8, 1f # skip 'save' if no previous owner
- /* Note that only a0 and a1 were preserved. */
+ /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
- rsr a3, exccause
- addi a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
- movi a0, coprocessor_owner
- addx4 a0, a3, a0
+ l32i a10, a8, THREAD_CPENABLE
+ xor a10, a10, a2
- /* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
+ /* Get context save area and call save routine. */
-1: GET_THREAD_INFO (a4, a1)
- s32i a4, a0, 0
+ l32i a2, a7, CP_REGS_TAB_OFFSET
+ l32i a3, a7, CP_REGS_TAB_SAVE
+ add a2, a2, a8
+ callx0 a3
+ /*
+ * Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
+ * before updating THREAD_CPENABLE
+ */
+ memw # (3)
+ s32i a10, a8, THREAD_CPENABLE
+1:
/* Get context save area and call load routine. */
l32i a2, a7, CP_REGS_TAB_OFFSET
l32i a3, a7, CP_REGS_TAB_LOAD
- add a2, a2, a4
+ add a2, a2, a9
callx0 a3
/* Restore all registers and return from exception handler. */
+ l32i a10, a1, PT_AREG10
+ l32i a9, a1, PT_AREG9
+ l32i a8, a1, PT_AREG8
l32i a7, a1, PT_AREG7
l32i a6, a1, PT_AREG6
l32i a5, a1, PT_AREG5
@@ -233,12 +299,4 @@ ENTRY(coprocessor_flush)
ENDPROC(coprocessor_flush)
- .data
-
-ENTRY(coprocessor_owner)
-
- .fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
#endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index f2c789a5a92a..3255d4f61844 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -2071,8 +2071,16 @@ ENTRY(_switch_to)
#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
l32i a3, a5, THREAD_CPENABLE
- xsr a3, cpenable
- s32i a3, a4, THREAD_CPENABLE
+#ifdef CONFIG_SMP
+ beqz a3, 1f
+ memw # pairs with memw (2) in fast_coprocessor
+ l32i a6, a5, THREAD_CP_OWNER_CPU
+ l32i a7, a5, THREAD_CPU
+ beq a6, a7, 1f # load 0 into CPENABLE if current CPU is not the owner
+ movi a3, 0
+1:
+#endif
+ wsr a3, cpenable
#endif
#if XCHAL_HAVE_EXCLUSIVE
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e8bfbca5f001..7e38292dd07a 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
+#include <asm/traps.h>
extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
@@ -63,52 +64,114 @@ EXPORT_SYMBOL(__stack_chk_guard);
#if XTENSA_HAVE_COPROCESSORS
-void coprocessor_release_all(struct thread_info *ti)
+void local_coprocessors_flush_release_all(void)
{
- unsigned long cpenable;
- int i;
+ struct thread_info **coprocessor_owner;
+ struct thread_info *unique_owner[XCHAL_CP_MAX];
+ int n = 0;
+ int i, j;
- /* Make sure we don't switch tasks during this operation. */
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+ xtensa_set_sr(XCHAL_CP_MASK, cpenable);
- preempt_disable();
+ for (i = 0; i < XCHAL_CP_MAX; i++) {
+ struct thread_info *ti = coprocessor_owner[i];
- /* Walk through all cp owners and release it for the requested one. */
+ if (ti) {
+ coprocessor_flush(ti, i);
- cpenable = ti->cpenable;
+ for (j = 0; j < n; j++)
+ if (unique_owner[j] == ti)
+ break;
+ if (j == n)
+ unique_owner[n++] = ti;
- for (i = 0; i < XCHAL_CP_MAX; i++) {
- if (coprocessor_owner[i] == ti) {
- coprocessor_owner[i] = 0;
- cpenable &= ~(1 << i);
+ coprocessor_owner[i] = NULL;
}
}
+ for (i = 0; i < n; i++) {
+ /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+ smp_wmb();
+ unique_owner[i]->cpenable = 0;
+ }
+ xtensa_set_sr(0, cpenable);
+}
- ti->cpenable = cpenable;
+static void local_coprocessor_release_all(void *info)
+{
+ struct thread_info *ti = info;
+ struct thread_info **coprocessor_owner;
+ int i;
+
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+
+ /* Walk through all cp owners and release it for the requested one. */
+
+ for (i = 0; i < XCHAL_CP_MAX; i++) {
+ if (coprocessor_owner[i] == ti)
+ coprocessor_owner[i] = NULL;
+ }
+ /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+ smp_wmb();
+ ti->cpenable = 0;
if (ti == current_thread_info())
xtensa_set_sr(0, cpenable);
+}
- preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+ if (ti->cpenable) {
+ /* pairs with memw (2) in fast_coprocessor */
+ smp_rmb();
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_release_all,
+ ti, true);
+ }
}
-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
{
- unsigned long cpenable, old_cpenable;
+ struct thread_info *ti = info;
+ struct thread_info **coprocessor_owner;
+ unsigned long old_cpenable;
int i;
- preempt_disable();
-
- old_cpenable = xtensa_get_sr(cpenable);
- cpenable = ti->cpenable;
- xtensa_set_sr(cpenable, cpenable);
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+ old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
- if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+ if (coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
- cpenable >>= 1;
}
xtensa_set_sr(old_cpenable, cpenable);
+}
- preempt_enable();
+void coprocessor_flush_all(struct thread_info *ti)
+{
+ if (ti->cpenable) {
+ /* pairs with memw (2) in fast_coprocessor */
+ smp_rmb();
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_flush_all,
+ ti, true);
+ }
+}
+
+static void local_coprocessor_flush_release_all(void *info)
+{
+ local_coprocessor_flush_all(info);
+ local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+ if (ti->cpenable) {
+ /* pairs with memw (2) in fast_coprocessor */
+ smp_rmb();
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_flush_release_all,
+ ti, true);
+ }
}
#endif
@@ -140,8 +203,7 @@ void flush_thread(void)
{
#if XTENSA_HAVE_COPROCESSORS
struct thread_info *ti = current_thread_info();
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
#endif
flush_ptrace_hw_breakpoint(current);
}
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 323c678a691f..22cdaa6729d3 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -171,8 +171,7 @@ static int tie_set(struct task_struct *target,
#if XTENSA_HAVE_COPROCESSORS
/* Flush all coprocessors before we overwrite them. */
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
ti->xtregs_cp.cp0 = newregs->cp0;
ti->xtregs_cp.cp1 = newregs->cp1;
ti->xtregs_cp.cp2 = newregs->cp2;
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 6f68649e86ba..c9ffd42db873 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -162,8 +162,7 @@ setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs)
return err;
#if XTENSA_HAVE_COPROCESSORS
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
err |= __copy_to_user(&frame->xtregs.cp, &ti->xtregs_cp,
sizeof (frame->xtregs.cp));
#endif
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index ed7c188ebedc..e861ce76ad38 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -30,6 +30,7 @@
#include <linux/thread_info.h>
#include <asm/cacheflush.h>
+#include <asm/coprocessor.h>
#include <asm/kdebug.h>
#include <asm/mmu_context.h>
#include <asm/mxregs.h>
@@ -273,6 +274,12 @@ int __cpu_disable(void)
*/
set_cpu_online(cpu, false);
+#if XTENSA_HAVE_COPROCESSORS
+ /*
+ * Flush coprocessor contexts that are active on the current CPU.
+ */
+ local_coprocessors_flush_release_all();
+#endif
/*
* OK - migrate IRQs away from this CPU
*/
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 62c497605128..138a86fbe9d7 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -57,6 +57,9 @@ static void do_nmi(struct pt_regs *regs);
static void do_unaligned_user(struct pt_regs *regs);
#endif
static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
static void do_debug(struct pt_regs *regs);
/*
@@ -69,7 +72,8 @@ static void do_debug(struct pt_regs *regs);
#define USER 0x02
#define COPROCESSOR(x) \
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor },\
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
typedef struct {
int cause;
@@ -327,6 +331,13 @@ static void do_unaligned_user(struct pt_regs *regs)
}
#endif
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+ coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
/* Handle debug events.
* When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
* preemption disabled to avoid rescheduling and keep mapping of hardware
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
end of thread, other threads:[~2022-04-21 10:12 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-21 10:10 [PATCH v2 00/10] xtensa: support coprocessors on SMP Max Filippov
2022-04-21 10:10 ` [PATCH v2 01/10] xtensa: clean up function declarations in traps.c Max Filippov
2022-04-21 10:10 ` [PATCH v2 02/10] xtensa: clean up exception handler prototypes Max Filippov
2022-04-21 10:10 ` [PATCH v2 03/10] xtensa: clean up declarations in coprocessor.h Max Filippov
2022-04-21 10:10 ` [PATCH v2 04/10] xtensa: clean up excsave1 initialization Max Filippov
2022-04-21 10:10 ` [PATCH v2 05/10] xtensa: use callx0 opcode in fast_coprocessor Max Filippov
2022-04-21 10:10 ` [PATCH v2 06/10] xtensa: handle coprocessor exceptions in kernel mode Max Filippov
2022-04-21 10:10 ` [PATCH v2 07/10] xtensa: add xtensa_xsr macro Max Filippov
2022-04-21 10:10 ` [PATCH v2 08/10] xtensa: merge SAVE_CP_REGS_TAB and LOAD_CP_REGS_TAB Max Filippov
2022-04-21 10:10 ` [PATCH v2 09/10] xtensa: get rid of stack frame in coprocessor_flush Max Filippov
2022-04-21 10:10 ` [PATCH v2 10/10] xtensa: support coprocessors on SMP Max Filippov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.