linux-mips.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
@ 2020-05-02  4:55 Huacai Chen
  2020-05-02  4:55 ` [PATCH V3 2/2] MIPS: Loongson-3: Calculate ra properly when unwinding the stack Huacai Chen
  2020-05-06  5:29 ` [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Jiaxun Yang
  0 siblings, 2 replies; 26+ messages in thread
From: Huacai Chen @ 2020-05-02  4:55 UTC (permalink / raw)
  To: Thomas Bogendoerfer
  Cc: linux-mips, Fuxin Zhang, Zhangjin Wu, Huacai Chen, Jiaxun Yang,
	Huacai Chen

Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
mode by default. However, gslq/gssq (16-bytes load/store instructions)
overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
gssq for optimization in kernel, we should enable COP2 usage in kernel.

Please pay attention that in this patch we only enable COP2 in kernel,
which means it will lose ST0_CU2 when a process go to user space (try
to use COP2 in user space will trigger an exception and then grab COP2,
which is similar to FPU). And as a result, we need to modify the context
switching code because the new scheduled process doesn't contain ST0_CU2
in its THERAD_STATUS probably.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
V3: Stop using ST0_MM and use ST0_CU2 instead (Thank Thomas and Maciej).

 arch/mips/boot/compressed/head.S   |  7 +++++++
 arch/mips/include/asm/stackframe.h | 12 +++++++++---
 arch/mips/kernel/head.S            | 18 +++++++++---------
 arch/mips/kernel/r4k_switch.S      |  3 +++
 arch/mips/kernel/traps.c           |  8 +++++++-
 5 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
index 409cb48..4580316 100644
--- a/arch/mips/boot/compressed/head.S
+++ b/arch/mips/boot/compressed/head.S
@@ -14,11 +14,18 @@
 
 #include <asm/asm.h>
 #include <asm/regdef.h>
+#include <asm/mipsregs.h>
 
 	.set noreorder
 	.cprestore
 	LEAF(start)
 start:
+#ifdef CONFIG_CPU_LOONGSON64
+	mfc0    t0, CP0_STATUS
+	or	t0, ST0_CU2   /* make 16-bytes load/store instructions usable */
+	mtc0    t0, CP0_STATUS
+#endif
+
 	/* Save boot rom start args */
 	move	s0, a0
 	move	s1, a1
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index 4d6ad90..c9ee7de 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -42,6 +42,12 @@
 	cfi_restore \reg \offset \docfi
 	.endm
 
+#ifdef CONFIG_CPU_LOONGSON64
+#define ST0_CUMASK (ST0_CU0 | ST0_CU2)
+#else
+#define ST0_CUMASK ST0_CU0
+#endif
+
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 #define STATMASK 0x3f
 #else
@@ -450,7 +456,7 @@
  */
 		.macro	CLI
 		mfc0	t0, CP0_STATUS
-		li	t1, ST0_CU0 | STATMASK
+		li	t1, ST0_CUMASK | STATMASK
 		or	t0, t1
 		xori	t0, STATMASK
 		mtc0	t0, CP0_STATUS
@@ -463,7 +469,7 @@
  */
 		.macro	STI
 		mfc0	t0, CP0_STATUS
-		li	t1, ST0_CU0 | STATMASK
+		li	t1, ST0_CUMASK | STATMASK
 		or	t0, t1
 		xori	t0, STATMASK & ~1
 		mtc0	t0, CP0_STATUS
@@ -477,7 +483,7 @@
  */
 		.macro	KMODE
 		mfc0	t0, CP0_STATUS
-		li	t1, ST0_CU0 | (STATMASK & ~1)
+		li	t1, ST0_CUMASK | (STATMASK & ~1)
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 		andi	t2, t0, ST0_IEP
 		srl	t2, 2
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 3b02ffe..bca6399 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -43,20 +43,20 @@
 	.set	pop
 	.endm
 
-	.macro	setup_c0_status_pri
-#ifdef CONFIG_64BIT
-	setup_c0_status ST0_KX 0
+#ifdef CONFIG_CPU_LOONGSON64
+#define ST0_SET ST0_KX | ST0_CU2
+#elif defined(CONFIG_64BIT)
+#define ST0_SET ST0_KX
 #else
-	setup_c0_status 0 0
+#define ST0_SET 0
 #endif
+
+	.macro	setup_c0_status_pri
+	setup_c0_status ST0_SET 0
 	.endm
 
 	.macro	setup_c0_status_sec
-#ifdef CONFIG_64BIT
-	setup_c0_status ST0_KX ST0_BEV
-#else
-	setup_c0_status 0 ST0_BEV
-#endif
+	setup_c0_status ST0_SET ST0_BEV
 	.endm
 
 #ifndef CONFIG_NO_EXCEPT_FILL
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 58232ae..c2fde40 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -53,6 +53,9 @@
 	nor	a3, $0, a3
 	and	a2, a3
 	or	a2, t1
+#ifdef CONFIG_CPU_LOONGSON64
+	or	a2, ST0_CU2   /* make 16-bytes load/store instructions usable */
+#endif
 	mtc0	a2, CP0_STATUS
 	move	v0, a0
 	jr	ra
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 31968cb..5efc525 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2108,9 +2108,15 @@ static void configure_status(void)
 	 * Disable coprocessors and select 32-bit or 64-bit addressing
 	 * and the 16/32 or 32/32 FPR register model.  Reset the BEV
 	 * flag that some firmware may have left set and the TS bit (for
-	 * IP27).  Set XX for ISA IV code to work.
+	 * IP27). Set XX for ISA IV code to work, and enable CU2 for
+	 * Loongson-3 to make 16-bytes load/store instructions usable.
 	 */
+#ifndef CONFIG_CPU_LOONGSON64
 	unsigned int status_set = ST0_CU0;
+#else
+	unsigned int status_set = ST0_CU0 | ST0_CU2;
+#endif
+
 #ifdef CONFIG_64BIT
 	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
 #endif
-- 
2.7.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V3 2/2] MIPS: Loongson-3: Calculate ra properly when unwinding the stack
  2020-05-02  4:55 [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Huacai Chen
@ 2020-05-02  4:55 ` Huacai Chen
  2020-05-06  5:29 ` [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Jiaxun Yang
  1 sibling, 0 replies; 26+ messages in thread
From: Huacai Chen @ 2020-05-02  4:55 UTC (permalink / raw)
  To: Thomas Bogendoerfer
  Cc: linux-mips, Fuxin Zhang, Zhangjin Wu, Huacai Chen, Jiaxun Yang,
	Huacai Chen

Loongson-3 has 16-bytes load/store instructions: gslq and gssq. This
patch calculate ra properly when unwinding the stack, if ra is saved
by gssq and restored by gslq.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/kernel/process.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index b2a7975..3993554 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -279,7 +279,21 @@ static inline int is_ra_save_ins(union mips_instruction *ip, int *poff)
 		*poff = ip->i_format.simmediate / sizeof(ulong);
 		return 1;
 	}
-
+#ifdef CONFIG_CPU_LOONGSON64
+	if ((ip->loongson3_lswc2_format.opcode == swc2_op) &&
+		      (ip->loongson3_lswc2_format.ls == 1) &&
+		      (ip->loongson3_lswc2_format.fr == 0) &&
+		      (ip->loongson3_lswc2_format.base == 29)) {
+		if (ip->loongson3_lswc2_format.rt == 31) {
+			*poff = ip->loongson3_lswc2_format.offset << 1;
+			return 1;
+		}
+		if (ip->loongson3_lswc2_format.rq == 31) {
+			*poff = (ip->loongson3_lswc2_format.offset << 1) + 1;
+			return 1;
+		}
+	}
+#endif
 	return 0;
 #endif
 }
-- 
2.7.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-05-02  4:55 [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Huacai Chen
  2020-05-02  4:55 ` [PATCH V3 2/2] MIPS: Loongson-3: Calculate ra properly when unwinding the stack Huacai Chen
@ 2020-05-06  5:29 ` Jiaxun Yang
  2020-08-01  7:59   ` Huacai Chen
  1 sibling, 1 reply; 26+ messages in thread
From: Jiaxun Yang @ 2020-05-06  5:29 UTC (permalink / raw)
  To: Huacai Chen, Thomas Bogendoerfer
  Cc: linux-mips, Fuxin Zhang, Zhangjin Wu, Huacai Chen



于 2020年5月2日 GMT+08:00 下午12:55:43, Huacai Chen <chenhc@lemote.com> 写到:
>Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
>mode by default. However, gslq/gssq (16-bytes load/store instructions)
>overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
>gssq for optimization in kernel, we should enable COP2 usage in kernel.
>
>Please pay attention that in this patch we only enable COP2 in kernel,
>which means it will lose ST0_CU2 when a process go to user space (try
>to use COP2 in user space will trigger an exception and then grab COP2,
>which is similar to FPU). And as a result, we need to modify the context
>switching code because the new scheduled process doesn't contain ST0_CU2
>in its THERAD_STATUS probably.
>
>Signed-off-by: Huacai Chen <chenhc@lemote.com>

Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>


>---
>V3: Stop using ST0_MM and use ST0_CU2 instead (Thank Thomas and Maciej).
>
> arch/mips/boot/compressed/head.S   |  7 +++++++
> arch/mips/include/asm/stackframe.h | 12 +++++++++---
> arch/mips/kernel/head.S            | 18 +++++++++---------
> arch/mips/kernel/r4k_switch.S      |  3 +++
> arch/mips/kernel/traps.c           |  8 +++++++-
> 5 files changed, 35 insertions(+), 13 deletions(-)
>
>diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
>index 409cb48..4580316 100644
>--- a/arch/mips/boot/compressed/head.S
>+++ b/arch/mips/boot/compressed/head.S
>@@ -14,11 +14,18 @@
> 
> #include <asm/asm.h>
> #include <asm/regdef.h>
>+#include <asm/mipsregs.h>
> 
> 	.set noreorder
> 	.cprestore
> 	LEAF(start)
> start:
>+#ifdef CONFIG_CPU_LOONGSON64
>+	mfc0    t0, CP0_STATUS
>+	or	t0, ST0_CU2   /* make 16-bytes load/store instructions usable */
>+	mtc0    t0, CP0_STATUS
>+#endif
>+
> 	/* Save boot rom start args */
> 	move	s0, a0
> 	move	s1, a1
>diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
>index 4d6ad90..c9ee7de 100644
>--- a/arch/mips/include/asm/stackframe.h
>+++ b/arch/mips/include/asm/stackframe.h
>@@ -42,6 +42,12 @@
> 	cfi_restore \reg \offset \docfi
> 	.endm
> 
>+#ifdef CONFIG_CPU_LOONGSON64
>+#define ST0_CUMASK (ST0_CU0 | ST0_CU2)
>+#else
>+#define ST0_CUMASK ST0_CU0
>+#endif
>+
> #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
> #define STATMASK 0x3f
> #else
>@@ -450,7 +456,7 @@
>  */
> 		.macro	CLI
> 		mfc0	t0, CP0_STATUS
>-		li	t1, ST0_CU0 | STATMASK
>+		li	t1, ST0_CUMASK | STATMASK
> 		or	t0, t1
> 		xori	t0, STATMASK
> 		mtc0	t0, CP0_STATUS
>@@ -463,7 +469,7 @@
>  */
> 		.macro	STI
> 		mfc0	t0, CP0_STATUS
>-		li	t1, ST0_CU0 | STATMASK
>+		li	t1, ST0_CUMASK | STATMASK
> 		or	t0, t1
> 		xori	t0, STATMASK & ~1
> 		mtc0	t0, CP0_STATUS
>@@ -477,7 +483,7 @@
>  */
> 		.macro	KMODE
> 		mfc0	t0, CP0_STATUS
>-		li	t1, ST0_CU0 | (STATMASK & ~1)
>+		li	t1, ST0_CUMASK | (STATMASK & ~1)
> #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
> 		andi	t2, t0, ST0_IEP
> 		srl	t2, 2
>diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
>index 3b02ffe..bca6399 100644
>--- a/arch/mips/kernel/head.S
>+++ b/arch/mips/kernel/head.S
>@@ -43,20 +43,20 @@
> 	.set	pop
> 	.endm
> 
>-	.macro	setup_c0_status_pri
>-#ifdef CONFIG_64BIT
>-	setup_c0_status ST0_KX 0
>+#ifdef CONFIG_CPU_LOONGSON64
>+#define ST0_SET ST0_KX | ST0_CU2
>+#elif defined(CONFIG_64BIT)
>+#define ST0_SET ST0_KX
> #else
>-	setup_c0_status 0 0
>+#define ST0_SET 0
> #endif
>+
>+	.macro	setup_c0_status_pri
>+	setup_c0_status ST0_SET 0
> 	.endm
> 
> 	.macro	setup_c0_status_sec
>-#ifdef CONFIG_64BIT
>-	setup_c0_status ST0_KX ST0_BEV
>-#else
>-	setup_c0_status 0 ST0_BEV
>-#endif
>+	setup_c0_status ST0_SET ST0_BEV
> 	.endm
> 
> #ifndef CONFIG_NO_EXCEPT_FILL
>diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
>index 58232ae..c2fde40 100644
>--- a/arch/mips/kernel/r4k_switch.S
>+++ b/arch/mips/kernel/r4k_switch.S
>@@ -53,6 +53,9 @@
> 	nor	a3, $0, a3
> 	and	a2, a3
> 	or	a2, t1
>+#ifdef CONFIG_CPU_LOONGSON64
>+	or	a2, ST0_CU2   /* make 16-bytes load/store instructions usable */
>+#endif
> 	mtc0	a2, CP0_STATUS
> 	move	v0, a0
> 	jr	ra
>diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
>index 31968cb..5efc525 100644
>--- a/arch/mips/kernel/traps.c
>+++ b/arch/mips/kernel/traps.c
>@@ -2108,9 +2108,15 @@ static void configure_status(void)
> 	 * Disable coprocessors and select 32-bit or 64-bit addressing
> 	 * and the 16/32 or 32/32 FPR register model.  Reset the BEV
> 	 * flag that some firmware may have left set and the TS bit (for
>-	 * IP27).  Set XX for ISA IV code to work.
>+	 * IP27). Set XX for ISA IV code to work, and enable CU2 for
>+	 * Loongson-3 to make 16-bytes load/store instructions usable.
> 	 */
>+#ifndef CONFIG_CPU_LOONGSON64
> 	unsigned int status_set = ST0_CU0;
>+#else
>+	unsigned int status_set = ST0_CU0 | ST0_CU2;
>+#endif
>+
> #ifdef CONFIG_64BIT
> 	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
> #endif

-- 
Jiaxun Yang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-05-06  5:29 ` [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Jiaxun Yang
@ 2020-08-01  7:59   ` Huacai Chen
  2020-08-05 12:10     ` Thomas Bogendoerfer
  0 siblings, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2020-08-01  7:59 UTC (permalink / raw)
  To: Jiaxun Yang; +Cc: Thomas Bogendoerfer, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas,

On Wed, May 6, 2020 at 1:30 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
>
>
> 于 2020年5月2日 GMT+08:00 下午12:55:43, Huacai Chen <chenhc@lemote.com> 写到:
> >Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
> >mode by default. However, gslq/gssq (16-bytes load/store instructions)
> >overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
> >gssq for optimization in kernel, we should enable COP2 usage in kernel.
> >
> >Please pay attention that in this patch we only enable COP2 in kernel,
> >which means it will lose ST0_CU2 when a process go to user space (try
> >to use COP2 in user space will trigger an exception and then grab COP2,
> >which is similar to FPU). And as a result, we need to modify the context
> >switching code because the new scheduled process doesn't contain ST0_CU2
> >in its THERAD_STATUS probably.
> >
> >Signed-off-by: Huacai Chen <chenhc@lemote.com>
>
> Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>
Does this patch have some unresolved problems, or there is something unclear?

Huacai
>
> >---
> >V3: Stop using ST0_MM and use ST0_CU2 instead (Thank Thomas and Maciej).
> >
> > arch/mips/boot/compressed/head.S   |  7 +++++++
> > arch/mips/include/asm/stackframe.h | 12 +++++++++---
> > arch/mips/kernel/head.S            | 18 +++++++++---------
> > arch/mips/kernel/r4k_switch.S      |  3 +++
> > arch/mips/kernel/traps.c           |  8 +++++++-
> > 5 files changed, 35 insertions(+), 13 deletions(-)
> >
> >diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
> >index 409cb48..4580316 100644
> >--- a/arch/mips/boot/compressed/head.S
> >+++ b/arch/mips/boot/compressed/head.S
> >@@ -14,11 +14,18 @@
> >
> > #include <asm/asm.h>
> > #include <asm/regdef.h>
> >+#include <asm/mipsregs.h>
> >
> >       .set noreorder
> >       .cprestore
> >       LEAF(start)
> > start:
> >+#ifdef CONFIG_CPU_LOONGSON64
> >+      mfc0    t0, CP0_STATUS
> >+      or      t0, ST0_CU2   /* make 16-bytes load/store instructions usable */
> >+      mtc0    t0, CP0_STATUS
> >+#endif
> >+
> >       /* Save boot rom start args */
> >       move    s0, a0
> >       move    s1, a1
> >diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
> >index 4d6ad90..c9ee7de 100644
> >--- a/arch/mips/include/asm/stackframe.h
> >+++ b/arch/mips/include/asm/stackframe.h
> >@@ -42,6 +42,12 @@
> >       cfi_restore \reg \offset \docfi
> >       .endm
> >
> >+#ifdef CONFIG_CPU_LOONGSON64
> >+#define ST0_CUMASK (ST0_CU0 | ST0_CU2)
> >+#else
> >+#define ST0_CUMASK ST0_CU0
> >+#endif
> >+
> > #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
> > #define STATMASK 0x3f
> > #else
> >@@ -450,7 +456,7 @@
> >  */
> >               .macro  CLI
> >               mfc0    t0, CP0_STATUS
> >-              li      t1, ST0_CU0 | STATMASK
> >+              li      t1, ST0_CUMASK | STATMASK
> >               or      t0, t1
> >               xori    t0, STATMASK
> >               mtc0    t0, CP0_STATUS
> >@@ -463,7 +469,7 @@
> >  */
> >               .macro  STI
> >               mfc0    t0, CP0_STATUS
> >-              li      t1, ST0_CU0 | STATMASK
> >+              li      t1, ST0_CUMASK | STATMASK
> >               or      t0, t1
> >               xori    t0, STATMASK & ~1
> >               mtc0    t0, CP0_STATUS
> >@@ -477,7 +483,7 @@
> >  */
> >               .macro  KMODE
> >               mfc0    t0, CP0_STATUS
> >-              li      t1, ST0_CU0 | (STATMASK & ~1)
> >+              li      t1, ST0_CUMASK | (STATMASK & ~1)
> > #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
> >               andi    t2, t0, ST0_IEP
> >               srl     t2, 2
> >diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
> >index 3b02ffe..bca6399 100644
> >--- a/arch/mips/kernel/head.S
> >+++ b/arch/mips/kernel/head.S
> >@@ -43,20 +43,20 @@
> >       .set    pop
> >       .endm
> >
> >-      .macro  setup_c0_status_pri
> >-#ifdef CONFIG_64BIT
> >-      setup_c0_status ST0_KX 0
> >+#ifdef CONFIG_CPU_LOONGSON64
> >+#define ST0_SET ST0_KX | ST0_CU2
> >+#elif defined(CONFIG_64BIT)
> >+#define ST0_SET ST0_KX
> > #else
> >-      setup_c0_status 0 0
> >+#define ST0_SET 0
> > #endif
> >+
> >+      .macro  setup_c0_status_pri
> >+      setup_c0_status ST0_SET 0
> >       .endm
> >
> >       .macro  setup_c0_status_sec
> >-#ifdef CONFIG_64BIT
> >-      setup_c0_status ST0_KX ST0_BEV
> >-#else
> >-      setup_c0_status 0 ST0_BEV
> >-#endif
> >+      setup_c0_status ST0_SET ST0_BEV
> >       .endm
> >
> > #ifndef CONFIG_NO_EXCEPT_FILL
> >diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
> >index 58232ae..c2fde40 100644
> >--- a/arch/mips/kernel/r4k_switch.S
> >+++ b/arch/mips/kernel/r4k_switch.S
> >@@ -53,6 +53,9 @@
> >       nor     a3, $0, a3
> >       and     a2, a3
> >       or      a2, t1
> >+#ifdef CONFIG_CPU_LOONGSON64
> >+      or      a2, ST0_CU2   /* make 16-bytes load/store instructions usable */
> >+#endif
> >       mtc0    a2, CP0_STATUS
> >       move    v0, a0
> >       jr      ra
> >diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> >index 31968cb..5efc525 100644
> >--- a/arch/mips/kernel/traps.c
> >+++ b/arch/mips/kernel/traps.c
> >@@ -2108,9 +2108,15 @@ static void configure_status(void)
> >        * Disable coprocessors and select 32-bit or 64-bit addressing
> >        * and the 16/32 or 32/32 FPR register model.  Reset the BEV
> >        * flag that some firmware may have left set and the TS bit (for
> >-       * IP27).  Set XX for ISA IV code to work.
> >+       * IP27). Set XX for ISA IV code to work, and enable CU2 for
> >+       * Loongson-3 to make 16-bytes load/store instructions usable.
> >        */
> >+#ifndef CONFIG_CPU_LOONGSON64
> >       unsigned int status_set = ST0_CU0;
> >+#else
> >+      unsigned int status_set = ST0_CU0 | ST0_CU2;
> >+#endif
> >+
> > #ifdef CONFIG_64BIT
> >       status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
> > #endif
>
> --
> Jiaxun Yang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-01  7:59   ` Huacai Chen
@ 2020-08-05 12:10     ` Thomas Bogendoerfer
  2020-08-05 13:51       ` Jiaxun Yang
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-05 12:10 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Sat, Aug 01, 2020 at 03:59:41PM +0800, Huacai Chen wrote:
> On Wed, May 6, 2020 at 1:30 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
> >
> >
> >
> > 于 2020年5月2日 GMT+08:00 下午12:55:43, Huacai Chen <chenhc@lemote.com> 写到:
> > >Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
> > >mode by default. However, gslq/gssq (16-bytes load/store instructions)
> > >overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
> > >gssq for optimization in kernel, we should enable COP2 usage in kernel.
> > >
> > >Please pay attention that in this patch we only enable COP2 in kernel,
> > >which means it will lose ST0_CU2 when a process go to user space (try
> > >to use COP2 in user space will trigger an exception and then grab COP2,
> > >which is similar to FPU). And as a result, we need to modify the context
> > >switching code because the new scheduled process doesn't contain ST0_CU2
> > >in its THERAD_STATUS probably.
> > >
> > >Signed-off-by: Huacai Chen <chenhc@lemote.com>
> >
> > Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> >
> Does this patch have some unresolved problems, or there is something unclear?

yes there is. Since this COP2 is a total black box to me, it would be
really helpfull to get some docs for it or at least some information what
it exactly does and how you want to use it in kernel code.

Looking closer at the patch I realized, that there is already support
for usage of COP2 in user land, which I thought isn't the case (at least
I understood that from one of your mails). So is there enough state
saving to support this ?

And finally what I stil don't like is the splittering of more
#ifdef LOONGSON into common code. I'd prefer a more generic way
to enable COPx for in kernel usage. Maybe a more generic config option
or a dynamic solution like the one for user land.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-05 12:10     ` Thomas Bogendoerfer
@ 2020-08-05 13:51       ` Jiaxun Yang
  2020-08-06  1:15         ` Huacai Chen
  2020-08-07 13:13         ` Thomas Bogendoerfer
  0 siblings, 2 replies; 26+ messages in thread
From: Jiaxun Yang @ 2020-08-05 13:51 UTC (permalink / raw)
  To: Thomas Bogendoerfer, Huacai Chen; +Cc: open list:MIPS, Fuxin Zhang, Zhangjin Wu



在 2020/8/5 20:10, Thomas Bogendoerfer 写道:
> On Sat, Aug 01, 2020 at 03:59:41PM +0800, Huacai Chen wrote:
>> On Wed, May 6, 2020 at 1:30 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>>>
>>>
>>> 于 2020年5月2日 GMT+08:00 下午12:55:43, Huacai Chen <chenhc@lemote.com> 写到:
>>>> Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
>>>> mode by default. However, gslq/gssq (16-bytes load/store instructions)
>>>> overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
>>>> gssq for optimization in kernel, we should enable COP2 usage in kernel.
>>>>
>>>> Please pay attention that in this patch we only enable COP2 in kernel,
>>>> which means it will lose ST0_CU2 when a process go to user space (try
>>>> to use COP2 in user space will trigger an exception and then grab COP2,
>>>> which is similar to FPU). And as a result, we need to modify the context
>>>> switching code because the new scheduled process doesn't contain ST0_CU2
>>>> in its THERAD_STATUS probably.
>>>>
>>>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>>> Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>>>
>> Does this patch have some unresolved problems, or there is something unclear?
> yes there is. Since this COP2 is a total black box to me, it would be
> really helpfull to get some docs for it or at least some information what
> it exactly does and how you want to use it in kernel code.

FYI:
Loongson doesn't have any CU2 register. It just reused LWC2 & LDC2 opcode
to define some load & store instructions (e.g. 128bit load to two GPRs).

I have a collection of these instructions here[1].

 From GS464E (3A2000+), execuating these instruction won't produce COP2 
unusable
exception. But older Loongson cores (GS464) will still produce COP2 
exception, thus
we should have CU2 enabled in kernel. That would allow us use to these 
instructions
to optimize kernel.

>
> Looking closer at the patch I realized, that there is already support
> for usage of COP2 in user land, which I thought isn't the case (at least
> I understood that from one of your mails). So is there enough state
> saving to support this ?

Actually there is no CU2 state for Loongson to save.

>
> And finally what I stil don't like is the splittering of more
> #ifdef LOONGSON into common code. I'd prefer a more generic way
> to enable COPx for in kernel usage. Maybe a more generic config option
> or a dynamic solution like the one for user land.
Agreed. some Kconfig options or cpuinfo_mips.options can be helpful.

Thanks.

- Jiaxun

[1]: https://github.com/FlyGoat/loongson-insn/
>
> Thomas.
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-05 13:51       ` Jiaxun Yang
@ 2020-08-06  1:15         ` Huacai Chen
  2020-08-07 13:13         ` Thomas Bogendoerfer
  1 sibling, 0 replies; 26+ messages in thread
From: Huacai Chen @ 2020-08-06  1:15 UTC (permalink / raw)
  To: Jiaxun Yang; +Cc: Thomas Bogendoerfer, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Jiaxun

On Thu, Aug 6, 2020 at 1:39 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
>
>
> 在 2020/8/5 20:10, Thomas Bogendoerfer 写道:
> > On Sat, Aug 01, 2020 at 03:59:41PM +0800, Huacai Chen wrote:
> >> On Wed, May 6, 2020 at 1:30 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
> >>>
> >>>
> >>> 于 2020年5月2日 GMT+08:00 下午12:55:43, Huacai Chen <chenhc@lemote.com> 写到:
> >>>> Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
> >>>> mode by default. However, gslq/gssq (16-bytes load/store instructions)
> >>>> overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
> >>>> gssq for optimization in kernel, we should enable COP2 usage in kernel.
> >>>>
> >>>> Please pay attention that in this patch we only enable COP2 in kernel,
> >>>> which means it will lose ST0_CU2 when a process go to user space (try
> >>>> to use COP2 in user space will trigger an exception and then grab COP2,
> >>>> which is similar to FPU). And as a result, we need to modify the context
> >>>> switching code because the new scheduled process doesn't contain ST0_CU2
> >>>> in its THERAD_STATUS probably.
> >>>>
> >>>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> >>> Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> >>>
> >> Does this patch have some unresolved problems, or there is something unclear?
> > yes there is. Since this COP2 is a total black box to me, it would be
> > really helpfull to get some docs for it or at least some information what
> > it exactly does and how you want to use it in kernel code.
>
> FYI:
> Loongson doesn't have any CU2 register. It just reused LWC2 & LDC2 opcode
> to define some load & store instructions (e.g. 128bit load to two GPRs).
>
> I have a collection of these instructions here[1].
>
>  From GS464E (3A2000+), execuating these instruction won't produce COP2
> unusable
> exception. But older Loongson cores (GS464) will still produce COP2
> exception, thus
> we should have CU2 enabled in kernel. That would allow us use to these
> instructions
> to optimize kernel.
>
> >
> > Looking closer at the patch I realized, that there is already support
> > for usage of COP2 in user land, which I thought isn't the case (at least
> > I understood that from one of your mails). So is there enough state
> > saving to support this ?
>
> Actually there is no CU2 state for Loongson to save.
>
> >
> > And finally what I stil don't like is the splittering of more
> > #ifdef LOONGSON into common code. I'd prefer a more generic way
> > to enable COPx for in kernel usage. Maybe a more generic config option
> > or a dynamic solution like the one for user land.
> Agreed. some Kconfig options or cpuinfo_mips.options can be helpful.
>
So make those codes depend on CONFIG_CPU_LOONGSON3_WORKAROUNDS instead
of CONFIG_CPU_LOONGSON64?

Huacai

> Thanks.
>
> - Jiaxun
>
> [1]: https://github.com/FlyGoat/loongson-insn/
> >
> > Thomas.
> >

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-05 13:51       ` Jiaxun Yang
  2020-08-06  1:15         ` Huacai Chen
@ 2020-08-07 13:13         ` Thomas Bogendoerfer
  2020-08-07 13:25           ` Jiaxun Yang
  1 sibling, 1 reply; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-07 13:13 UTC (permalink / raw)
  To: Jiaxun Yang; +Cc: Huacai Chen, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Wed, Aug 05, 2020 at 09:51:44PM +0800, Jiaxun Yang wrote:
> >yes there is. Since this COP2 is a total black box to me, it would be
> >really helpfull to get some docs for it or at least some information what
> >it exactly does and how you want to use it in kernel code.
> 
> FYI:
> Loongson doesn't have any CU2 register. It just reused LWC2 & LDC2 opcode
> to define some load & store instructions (e.g. 128bit load to two GPRs).
> 
> I have a collection of these instructions here[1].
> 
> From GS464E (3A2000+), execuating these instruction won't produce COP2
> unusable
> exception. But older Loongson cores (GS464) will still produce COP2
> exception, thus
> we should have CU2 enabled in kernel. That would allow us use to these
> instructions
> to optimize kernel.

thank you that makes things a little bit clearer.

How will this be used in kernel code ? Special assembler routines or
by enabling gcc options ?

> >And finally what I stil don't like is the splittering of more
> >#ifdef LOONGSON into common code. I'd prefer a more generic way
> >to enable COPx for in kernel usage. Maybe a more generic config option
> >or a dynamic solution like the one for user land.
> Agreed. some Kconfig options or cpuinfo_mips.options can be helpful.

let's see whether this really is needed.

To me it looks like the COP2 exception support for loongson makes
thing worse than it helps. How about the patch below ? There is still
a gap between starting the kernel and COP2 enabled for which I'm not
sure, if we are hitting COP2 instructions.

Thomas.

diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
index 6b7396a6a115..dfa72e9be64a 100644
--- a/arch/mips/include/asm/cop2.h
+++ b/arch/mips/include/asm/cop2.h
@@ -33,13 +33,6 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *);
 #define cop2_present		1
 #define cop2_lazy_restore	0
 
-#elif defined(CONFIG_CPU_LOONGSON64)
-
-#define cop2_present		1
-#define cop2_lazy_restore	1
-#define cop2_save(r)		do { (void)(r); } while (0)
-#define cop2_restore(r)		do { (void)(r); } while (0)
-
 #else
 
 #define cop2_present		0
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index b95ef98fc847..f0a8ef5a8605 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2194,6 +2194,11 @@ static void configure_status(void)
 #ifdef CONFIG_64BIT
 	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
 #endif
+#ifdef CONFIG_CPU_LOONGSON64
+	/* enable 16-bytes load/store instructions */
+	status_set |= ST0_CU2;
+#endif
+
 	if (current_cpu_data.isa_level & MIPS_CPU_ISA_IV)
 		status_set |= ST0_XX;
 	if (cpu_has_dsp)



-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-07 13:13         ` Thomas Bogendoerfer
@ 2020-08-07 13:25           ` Jiaxun Yang
  2020-08-07 13:36             ` Thomas Bogendoerfer
  2020-08-09 14:53             ` Jiaxun Yang
  0 siblings, 2 replies; 26+ messages in thread
From: Jiaxun Yang @ 2020-08-07 13:25 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Huacai Chen, open list:MIPS, Fuxin Zhang, Zhangjin Wu



在 2020/8/7 21:13, Thomas Bogendoerfer 写道:
> On Wed, Aug 05, 2020 at 09:51:44PM +0800, Jiaxun Yang wrote:
>>> yes there is. Since this COP2 is a total black box to me, it would be
>>> really helpfull to get some docs for it or at least some information what
>>> it exactly does and how you want to use it in kernel code.
>> FYI:
>> Loongson doesn't have any CU2 register. It just reused LWC2 & LDC2 opcode
>> to define some load & store instructions (e.g. 128bit load to two GPRs).
>>
>> I have a collection of these instructions here[1].
>>
>>  From GS464E (3A2000+), execuating these instruction won't produce COP2
>> unusable
>> exception. But older Loongson cores (GS464) will still produce COP2
>> exception, thus
>> we should have CU2 enabled in kernel. That would allow us use to these
>> instructions
>> to optimize kernel.
> thank you that makes things a little bit clearer.
>
> How will this be used in kernel code ? Special assembler routines or
> by enabling gcc options ?

Via special assembly routines, as -msoft-float will disable generation of
these instructions in GCC.

I knew Huacai have out-of-tree memcpy optimization and Xuerui have
RAID5 optimiztion with these instructions.

>
>>> And finally what I stil don't like is the splittering of more
>>> #ifdef LOONGSON into common code. I'd prefer a more generic way
>>> to enable COPx for in kernel usage. Maybe a more generic config option
>>> or a dynamic solution like the one for user land.
>> Agreed. some Kconfig options or cpuinfo_mips.options can be helpful.
> let's see whether this really is needed.
>
> To me it looks like the COP2 exception support for loongson makes
> thing worse than it helps. How about the patch below ? There is still
> a gap between starting the kernel and COP2 enabled for which I'm not
> sure, if we are hitting COP2 instructions.

Yes, the exception does not really make sense.
What's your opinion Huacai?

For in-kernel usage, we still have to enable it in genex.

Thanks for your advice~

- Jiaxun

>
> Thomas.
>
> diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
> index 6b7396a6a115..dfa72e9be64a 100644
> --- a/arch/mips/include/asm/cop2.h
> +++ b/arch/mips/include/asm/cop2.h
> @@ -33,13 +33,6 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *);
>   #define cop2_present		1
>   #define cop2_lazy_restore	0
>   
> -#elif defined(CONFIG_CPU_LOONGSON64)
> -
> -#define cop2_present		1
> -#define cop2_lazy_restore	1
> -#define cop2_save(r)		do { (void)(r); } while (0)
> -#define cop2_restore(r)		do { (void)(r); } while (0)
> -
>   #else
>   
>   #define cop2_present		0
> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> index b95ef98fc847..f0a8ef5a8605 100644
> --- a/arch/mips/kernel/traps.c
> +++ b/arch/mips/kernel/traps.c
> @@ -2194,6 +2194,11 @@ static void configure_status(void)
>   #ifdef CONFIG_64BIT
>   	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
>   #endif
> +#ifdef CONFIG_CPU_LOONGSON64
> +	/* enable 16-bytes load/store instructions */
> +	status_set |= ST0_CU2;
> +#endif
> +
>   	if (current_cpu_data.isa_level & MIPS_CPU_ISA_IV)
>   		status_set |= ST0_XX;
>   	if (cpu_has_dsp)
>
>
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-07 13:25           ` Jiaxun Yang
@ 2020-08-07 13:36             ` Thomas Bogendoerfer
  2020-08-09 14:53             ` Jiaxun Yang
  1 sibling, 0 replies; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-07 13:36 UTC (permalink / raw)
  To: Jiaxun Yang; +Cc: Huacai Chen, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Fri, Aug 07, 2020 at 09:25:25PM +0800, Jiaxun Yang wrote:
> 
> 
> 在 2020/8/7 21:13, Thomas Bogendoerfer 写道:
> >On Wed, Aug 05, 2020 at 09:51:44PM +0800, Jiaxun Yang wrote:
> >>>yes there is. Since this COP2 is a total black box to me, it would be
> >>>really helpfull to get some docs for it or at least some information what
> >>>it exactly does and how you want to use it in kernel code.
> >>FYI:
> >>Loongson doesn't have any CU2 register. It just reused LWC2 & LDC2 opcode
> >>to define some load & store instructions (e.g. 128bit load to two GPRs).
> >>
> >>I have a collection of these instructions here[1].
> >>
> >> From GS464E (3A2000+), execuating these instruction won't produce COP2
> >>unusable
> >>exception. But older Loongson cores (GS464) will still produce COP2
> >>exception, thus
> >>we should have CU2 enabled in kernel. That would allow us use to these
> >>instructions
> >>to optimize kernel.
> >thank you that makes things a little bit clearer.
> >
> >How will this be used in kernel code ? Special assembler routines or
> >by enabling gcc options ?
> 
> Via special assembly routines, as -msoft-float will disable generation of
> these instructions in GCC.
> 
> I knew Huacai have out-of-tree memcpy optimization and Xuerui have
> RAID5 optimiztion with these instructions.
> 
> >
> >>>And finally what I stil don't like is the splittering of more
> >>>#ifdef LOONGSON into common code. I'd prefer a more generic way
> >>>to enable COPx for in kernel usage. Maybe a more generic config option
> >>>or a dynamic solution like the one for user land.
> >>Agreed. some Kconfig options or cpuinfo_mips.options can be helpful.
> >let's see whether this really is needed.
> >
> >To me it looks like the COP2 exception support for loongson makes
> >thing worse than it helps. How about the patch below ? There is still
> >a gap between starting the kernel and COP2 enabled for which I'm not
> >sure, if we are hitting COP2 instructions.
> 
> Yes, the exception does not really make sense.
> What's your opinion Huacai?
> 
> For in-kernel usage, we still have to enable it in genex.

you shouldn't need any other changes. ST0_CU2 will be set in
configure_status() and not touched after that.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-07 13:25           ` Jiaxun Yang
  2020-08-07 13:36             ` Thomas Bogendoerfer
@ 2020-08-09 14:53             ` Jiaxun Yang
  2020-08-10 14:12               ` Thomas Bogendoerfer
  1 sibling, 1 reply; 26+ messages in thread
From: Jiaxun Yang @ 2020-08-09 14:53 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Huacai Chen, open list:MIPS, Fuxin Zhang, Zhangjin Wu



在 2020/8/7 下午9:25, Jiaxun Yang 写道:
>
>
> 在 2020/8/7 21:13, Thomas Bogendoerfer 写道:
>> On Wed, Aug 05, 2020 at 09:51:44PM +0800, Jiaxun Yang wrote:
>>>> yes there is. Since this COP2 is a total black box to me, it would be
>>>> really helpfull to get some docs for it or at least some 
>>>> information what
>>>> it exactly does and how you want to use it in kernel code.
>>> FYI:
>>> Loongson doesn't have any CU2 register. It just reused LWC2 & LDC2 
>>> opcode
>>> to define some load & store instructions (e.g. 128bit load to two 
>>> GPRs).
>>>
>>> I have a collection of these instructions here[1].
>>>
>>>  From GS464E (3A2000+), execuating these instruction won't produce COP2
>>> unusable
>>> exception. But older Loongson cores (GS464) will still produce COP2
>>> exception, thus
>>> we should have CU2 enabled in kernel. That would allow us use to these
>>> instructions
>>> to optimize kernel.
>> thank you that makes things a little bit clearer.
>>
>> How will this be used in kernel code ? Special assembler routines or
>> by enabling gcc options ?
>
> Via special assembly routines, as -msoft-float will disable generation of
> these instructions in GCC.
>
> I knew Huacai have out-of-tree memcpy optimization and Xuerui have
> RAID5 optimiztion with these instructions.
>
>>
>>>> And finally what I stil don't like is the splittering of more
>>>> #ifdef LOONGSON into common code. I'd prefer a more generic way
>>>> to enable COPx for in kernel usage. Maybe a more generic config option
>>>> or a dynamic solution like the one for user land.
>>> Agreed. some Kconfig options or cpuinfo_mips.options can be helpful.
>> let's see whether this really is needed.
>>
>> To me it looks like the COP2 exception support for loongson makes
>> thing worse than it helps. How about the patch below ? There is still
>> a gap between starting the kernel and COP2 enabled for which I'm not
>> sure, if we are hitting COP2 instructions.
>

I had a off-list discussion with Huacai and found it's not the case.

Some Loongson CU2 instructions (e.g. GSLQC1) uses FPU registers, and now 
we're uncoditionally
let the thread own FPU in cop2-ex handler when CU2 exception triggered.
However, if we enable CU2 all the time, than the FPU context of these 
instructions might be lost.
(Yes, these instructions won't generate CU1 unusable exception when CU2 
is enabled but not CU1,
it is likely to be a design fault.)

Thus we still need to enable CU2 with exception for user space, and we 
can always enable CU2 in
kernel since kernel won't be compiled with hard-float. :-)

Thanks.

- Jiaxun

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-09 14:53             ` Jiaxun Yang
@ 2020-08-10 14:12               ` Thomas Bogendoerfer
  2020-08-11  2:16                 ` Jiaxun Yang
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-10 14:12 UTC (permalink / raw)
  To: Jiaxun Yang; +Cc: Huacai Chen, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Sun, Aug 09, 2020 at 10:53:13PM +0800, Jiaxun Yang wrote:
> Thus we still need to enable CU2 with exception for user space, and we can
> always enable CU2 in
> kernel since kernel won't be compiled with hard-float. :-)

I see, how about the patch below

Thomas.


diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 4ddc12e4444a..f7144116b43b 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -389,6 +389,13 @@
 #define ST0_CU3			0x80000000
 #define ST0_XX			0x80000000	/* MIPS IV naming */
 
+/* in-kernel enabled CUs */
+#ifdef CONFIG_CPU_LOONGSOON64
+#define ST0_KERNEL_CUMASK	(ST0_CU0 | ST_CU2)
+#else
+#define ST0_KERNEL_CUMASK	ST0_CU0
+#endif
+
 /*
  * Bitfields and bit numbers in the coprocessor 0 IntCtl register. (MIPSR2)
  */
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index 3e8d2aaf96af..aa430a6c68b2 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -450,7 +450,7 @@
  */
 		.macro	CLI
 		mfc0	t0, CP0_STATUS
-		li	t1, ST0_CU0 | STATMASK
+		li	t1, ST0_KERNEL_CUMASK | STATMASK
 		or	t0, t1
 		xori	t0, STATMASK
 		mtc0	t0, CP0_STATUS
@@ -463,7 +463,7 @@
  */
 		.macro	STI
 		mfc0	t0, CP0_STATUS
-		li	t1, ST0_CU0 | STATMASK
+		li	t1, ST0_KERNEL_CUMASK | STATMASK
 		or	t0, t1
 		xori	t0, STATMASK & ~1
 		mtc0	t0, CP0_STATUS
@@ -477,7 +477,7 @@
  */
 		.macro	KMODE
 		mfc0	t0, CP0_STATUS
-		li	t1, ST0_CU0 | (STATMASK & ~1)
+		li	t1, ST0_KERNEL_CUMASK | (STATMASK & ~1)
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 		andi	t2, t0, ST0_IEP
 		srl	t2, 2
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 7dd234e788e6..61b73580b877 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -35,7 +35,7 @@
 	.macro	setup_c0_status set clr
 	.set	push
 	mfc0	t0, CP0_STATUS
-	or	t0, ST0_CU0|\set|0x1f|\clr
+	or	t0, ST0_KERNEL_CUMASK|\set|0x1f|\clr
 	xor	t0, 0x1f|\clr
 	mtc0	t0, CP0_STATUS
 	.set	noreorder
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ff5320b79100..90b869297893 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -68,7 +68,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
 	unsigned long status;
 
 	/* New thread loses kernel privileges. */
-	status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
+	status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_CU2|ST0_FR|KU_MASK);
 	status |= KU_USER;
 	regs->cp0_status = status;
 	lose_fpu(0);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index b95ef98fc847..f4362ac172c6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2190,7 +2190,7 @@ static void configure_status(void)
 	 * flag that some firmware may have left set and the TS bit (for
 	 * IP27).  Set XX for ISA IV code to work.
 	 */
-	unsigned int status_set = ST0_CU0;
+	unsigned int status_set = ST0_KERNEL_CUMASK;
 #ifdef CONFIG_64BIT
 	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
 #endif

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-10 14:12               ` Thomas Bogendoerfer
@ 2020-08-11  2:16                 ` Jiaxun Yang
  2020-08-11  6:45                   ` Huacai Chen
  0 siblings, 1 reply; 26+ messages in thread
From: Jiaxun Yang @ 2020-08-11  2:16 UTC (permalink / raw)
  To: Thomas Bogendoerfer, Huacai Chen; +Cc: open list:MIPS, Fuxin Zhang, Zhangjin Wu



在 2020/8/10 22:12, Thomas Bogendoerfer 写道:
> On Sun, Aug 09, 2020 at 10:53:13PM +0800, Jiaxun Yang wrote:
>> Thus we still need to enable CU2 with exception for user space, and we can
>> always enable CU2 in
>> kernel since kernel won't be compiled with hard-float. :-)
> I see, how about the patch below
That looks fine for me.
Is it good with you, Huacai?

Thanks.

- Jiaxun

>
> Thomas.
>
>
> diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
> index 4ddc12e4444a..f7144116b43b 100644
> --- a/arch/mips/include/asm/mipsregs.h
> +++ b/arch/mips/include/asm/mipsregs.h
> @@ -389,6 +389,13 @@
>   #define ST0_CU3			0x80000000
>   #define ST0_XX			0x80000000	/* MIPS IV naming */
>   
> +/* in-kernel enabled CUs */
> +#ifdef CONFIG_CPU_LOONGSOON64
> +#define ST0_KERNEL_CUMASK	(ST0_CU0 | ST_CU2)
> +#else
> +#define ST0_KERNEL_CUMASK	ST0_CU0
> +#endif
> +
>   /*
>    * Bitfields and bit numbers in the coprocessor 0 IntCtl register. (MIPSR2)
>    */
> diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
> index 3e8d2aaf96af..aa430a6c68b2 100644
> --- a/arch/mips/include/asm/stackframe.h
> +++ b/arch/mips/include/asm/stackframe.h
> @@ -450,7 +450,7 @@
>    */
>   		.macro	CLI
>   		mfc0	t0, CP0_STATUS
> -		li	t1, ST0_CU0 | STATMASK
> +		li	t1, ST0_KERNEL_CUMASK | STATMASK
>   		or	t0, t1
>   		xori	t0, STATMASK
>   		mtc0	t0, CP0_STATUS
> @@ -463,7 +463,7 @@
>    */
>   		.macro	STI
>   		mfc0	t0, CP0_STATUS
> -		li	t1, ST0_CU0 | STATMASK
> +		li	t1, ST0_KERNEL_CUMASK | STATMASK
>   		or	t0, t1
>   		xori	t0, STATMASK & ~1
>   		mtc0	t0, CP0_STATUS
> @@ -477,7 +477,7 @@
>    */
>   		.macro	KMODE
>   		mfc0	t0, CP0_STATUS
> -		li	t1, ST0_CU0 | (STATMASK & ~1)
> +		li	t1, ST0_KERNEL_CUMASK | (STATMASK & ~1)
>   #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
>   		andi	t2, t0, ST0_IEP
>   		srl	t2, 2
> diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
> index 7dd234e788e6..61b73580b877 100644
> --- a/arch/mips/kernel/head.S
> +++ b/arch/mips/kernel/head.S
> @@ -35,7 +35,7 @@
>   	.macro	setup_c0_status set clr
>   	.set	push
>   	mfc0	t0, CP0_STATUS
> -	or	t0, ST0_CU0|\set|0x1f|\clr
> +	or	t0, ST0_KERNEL_CUMASK|\set|0x1f|\clr
>   	xor	t0, 0x1f|\clr
>   	mtc0	t0, CP0_STATUS
>   	.set	noreorder
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index ff5320b79100..90b869297893 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -68,7 +68,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
>   	unsigned long status;
>   
>   	/* New thread loses kernel privileges. */
> -	status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
> +	status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_CU2|ST0_FR|KU_MASK);
>   	status |= KU_USER;
>   	regs->cp0_status = status;
>   	lose_fpu(0);
> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> index b95ef98fc847..f4362ac172c6 100644
> --- a/arch/mips/kernel/traps.c
> +++ b/arch/mips/kernel/traps.c
> @@ -2190,7 +2190,7 @@ static void configure_status(void)
>   	 * flag that some firmware may have left set and the TS bit (for
>   	 * IP27).  Set XX for ISA IV code to work.
>   	 */
> -	unsigned int status_set = ST0_CU0;
> +	unsigned int status_set = ST0_KERNEL_CUMASK;
>   #ifdef CONFIG_64BIT
>   	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
>   #endif
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-11  2:16                 ` Jiaxun Yang
@ 2020-08-11  6:45                   ` Huacai Chen
  2020-08-11 12:06                     ` Thomas Bogendoerfer
  0 siblings, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2020-08-11  6:45 UTC (permalink / raw)
  To: Jiaxun Yang; +Cc: Thomas Bogendoerfer, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas and Jiaxun,

On Tue, Aug 11, 2020 at 10:18 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
>
>
> 在 2020/8/10 22:12, Thomas Bogendoerfer 写道:
> > On Sun, Aug 09, 2020 at 10:53:13PM +0800, Jiaxun Yang wrote:
> >> Thus we still need to enable CU2 with exception for user space, and we can
> >> always enable CU2 in
> >> kernel since kernel won't be compiled with hard-float. :-)
> > I see, how about the patch below
> That looks fine for me.
> Is it good with you, Huacai?

There are two problems:
1, zboot (arch/mips/boot/compressed/head.S) should be considered,
because the initial value of Status may or may not contain CU2.
2, r4k_switch.S should set CU2 for the new process, otherwise it
cannot use gslq/gssq while it in kernel (Because the new process
doesn't contain CU2 in
THERAD_STATUS. Though a process sets CU2 when it enters kernel, but it
only sets CU2 in hardware, not in THREAD_STATUS).

Huacai

>
> Thanks.
>
> - Jiaxun
>
> >
> > Thomas.
> >
> >
> > diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
> > index 4ddc12e4444a..f7144116b43b 100644
> > --- a/arch/mips/include/asm/mipsregs.h
> > +++ b/arch/mips/include/asm/mipsregs.h
> > @@ -389,6 +389,13 @@
> >   #define ST0_CU3                     0x80000000
> >   #define ST0_XX                      0x80000000      /* MIPS IV naming */
> >
> > +/* in-kernel enabled CUs */
> > +#ifdef CONFIG_CPU_LOONGSOON64
> > +#define ST0_KERNEL_CUMASK    (ST0_CU0 | ST_CU2)
> > +#else
> > +#define ST0_KERNEL_CUMASK    ST0_CU0
> > +#endif
> > +
> >   /*
> >    * Bitfields and bit numbers in the coprocessor 0 IntCtl register. (MIPSR2)
> >    */
> > diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
> > index 3e8d2aaf96af..aa430a6c68b2 100644
> > --- a/arch/mips/include/asm/stackframe.h
> > +++ b/arch/mips/include/asm/stackframe.h
> > @@ -450,7 +450,7 @@
> >    */
> >               .macro  CLI
> >               mfc0    t0, CP0_STATUS
> > -             li      t1, ST0_CU0 | STATMASK
> > +             li      t1, ST0_KERNEL_CUMASK | STATMASK
> >               or      t0, t1
> >               xori    t0, STATMASK
> >               mtc0    t0, CP0_STATUS
> > @@ -463,7 +463,7 @@
> >    */
> >               .macro  STI
> >               mfc0    t0, CP0_STATUS
> > -             li      t1, ST0_CU0 | STATMASK
> > +             li      t1, ST0_KERNEL_CUMASK | STATMASK
> >               or      t0, t1
> >               xori    t0, STATMASK & ~1
> >               mtc0    t0, CP0_STATUS
> > @@ -477,7 +477,7 @@
> >    */
> >               .macro  KMODE
> >               mfc0    t0, CP0_STATUS
> > -             li      t1, ST0_CU0 | (STATMASK & ~1)
> > +             li      t1, ST0_KERNEL_CUMASK | (STATMASK & ~1)
> >   #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
> >               andi    t2, t0, ST0_IEP
> >               srl     t2, 2
> > diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
> > index 7dd234e788e6..61b73580b877 100644
> > --- a/arch/mips/kernel/head.S
> > +++ b/arch/mips/kernel/head.S
> > @@ -35,7 +35,7 @@
> >       .macro  setup_c0_status set clr
> >       .set    push
> >       mfc0    t0, CP0_STATUS
> > -     or      t0, ST0_CU0|\set|0x1f|\clr
> > +     or      t0, ST0_KERNEL_CUMASK|\set|0x1f|\clr
> >       xor     t0, 0x1f|\clr
> >       mtc0    t0, CP0_STATUS
> >       .set    noreorder
> > diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> > index ff5320b79100..90b869297893 100644
> > --- a/arch/mips/kernel/process.c
> > +++ b/arch/mips/kernel/process.c
> > @@ -68,7 +68,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
> >       unsigned long status;
> >
> >       /* New thread loses kernel privileges. */
> > -     status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
> > +     status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_CU2|ST0_FR|KU_MASK);
> >       status |= KU_USER;
> >       regs->cp0_status = status;
> >       lose_fpu(0);
> > diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> > index b95ef98fc847..f4362ac172c6 100644
> > --- a/arch/mips/kernel/traps.c
> > +++ b/arch/mips/kernel/traps.c
> > @@ -2190,7 +2190,7 @@ static void configure_status(void)
> >        * flag that some firmware may have left set and the TS bit (for
> >        * IP27).  Set XX for ISA IV code to work.
> >        */
> > -     unsigned int status_set = ST0_CU0;
> > +     unsigned int status_set = ST0_KERNEL_CUMASK;
> >   #ifdef CONFIG_64BIT
> >       status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
> >   #endif
> >

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-11  6:45                   ` Huacai Chen
@ 2020-08-11 12:06                     ` Thomas Bogendoerfer
  2020-08-14  9:44                       ` Huacai Chen
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-11 12:06 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Tue, Aug 11, 2020 at 02:45:05PM +0800, Huacai Chen wrote:
> Hi, Thomas and Jiaxun,
> 
> On Tue, Aug 11, 2020 at 10:18 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
> >
> >
> >
> > 在 2020/8/10 22:12, Thomas Bogendoerfer 写道:
> > > On Sun, Aug 09, 2020 at 10:53:13PM +0800, Jiaxun Yang wrote:
> > >> Thus we still need to enable CU2 with exception for user space, and we can
> > >> always enable CU2 in
> > >> kernel since kernel won't be compiled with hard-float. :-)
> > > I see, how about the patch below
> > That looks fine for me.
> > Is it good with you, Huacai?
> 
> There are two problems:
> 1, zboot (arch/mips/boot/compressed/head.S) should be considered,
> because the initial value of Status may or may not contain CU2.

this comes with it's own memcpy/memset and stuff, I don't see a reason why
COP2 needs to be enabled there,

> 2, r4k_switch.S should set CU2 for the new process, otherwise it
> cannot use gslq/gssq while it in kernel (Because the new process
> doesn't contain CU2 in THERAD_STATUS.

which is correct for all user space process, otherwise the whole
cop2 exception thing wouldn't work. And if cop2 exception handling
has been run it's set in THREAD_STATUS.

> Though a process sets CU2 when it enters kernel, but it
> only sets CU2 in hardware, not in THREAD_STATUS).

A kernel thread will get THREAD_STATUS from current running kernel code,
at least that's how I read this code:

       if (unlikely(p->flags & PF_KTHREAD)) {
                /* kernel thread */
                unsigned long status = p->thread.cp0_status;
                memset(childregs, 0, sizeof(struct pt_regs));
                ti->addr_limit = KERNEL_DS;
                p->thread.reg16 = usp; /* fn */
                p->thread.reg17 = kthread_arg;
                p->thread.reg29 = childksp;
                p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
                status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
                         ((status & (ST0_KUC | ST0_IEC)) << 2);
#else
                status |= ST0_EXL;
#endif
                childregs->cp0_status = status;
                return 0;
        }

If there is still something missing, I want to find the root cause
and not paper over it in r4k_switch.S and IMHO break COP2 handling for
loongsoon completely.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-11 12:06                     ` Thomas Bogendoerfer
@ 2020-08-14  9:44                       ` Huacai Chen
  2020-08-14 13:16                         ` Jiaxun Yang
  2020-08-26 12:46                         ` Thomas Bogendoerfer
  0 siblings, 2 replies; 26+ messages in thread
From: Huacai Chen @ 2020-08-14  9:44 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas,

On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Tue, Aug 11, 2020 at 02:45:05PM +0800, Huacai Chen wrote:
> > Hi, Thomas and Jiaxun,
> >
> > On Tue, Aug 11, 2020 at 10:18 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
> > >
> > >
> > >
> > > 在 2020/8/10 22:12, Thomas Bogendoerfer 写道:
> > > > On Sun, Aug 09, 2020 at 10:53:13PM +0800, Jiaxun Yang wrote:
> > > >> Thus we still need to enable CU2 with exception for user space, and we can
> > > >> always enable CU2 in
> > > >> kernel since kernel won't be compiled with hard-float. :-)
> > > > I see, how about the patch below
> > > That looks fine for me.
> > > Is it good with you, Huacai?
> >
> > There are two problems:
> > 1, zboot (arch/mips/boot/compressed/head.S) should be considered,
> > because the initial value of Status may or may not contain CU2.
>
> this comes with it's own memcpy/memset and stuff, I don't see a reason why
> COP2 needs to be enabled there,
gslq/gssq can also be generated by toolchains.

>
> > 2, r4k_switch.S should set CU2 for the new process, otherwise it
> > cannot use gslq/gssq while it in kernel (Because the new process
> > doesn't contain CU2 in THERAD_STATUS.
>
> which is correct for all user space process, otherwise the whole
> cop2 exception thing wouldn't work. And if cop2 exception handling
> has been run it's set in THREAD_STATUS.
>
THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
when a process runs in kernel-space. KSTK_STATUS (what you have seen
in copy_thread_tls() below) means cp0_status in a process's kernel
stack, which saves the cp0_status when a process runs in user-space.
Whether COP2 exception can work depends on that KSTK_STATUS (but not
THREAD_STATUS) should not contain CU2 at the first time. So, whether
or not THREAD_STATUS contains CU2, it won't break COP2 handling.

Huacai
> > Though a process sets CU2 when it enters kernel, but it
> > only sets CU2 in hardware, not in THREAD_STATUS).
>
> A kernel thread will get THREAD_STATUS from current running kernel code,
> at least that's how I read this code:
>
>        if (unlikely(p->flags & PF_KTHREAD)) {
>                 /* kernel thread */
>                 unsigned long status = p->thread.cp0_status;
>                 memset(childregs, 0, sizeof(struct pt_regs));
>                 ti->addr_limit = KERNEL_DS;
>                 p->thread.reg16 = usp; /* fn */
>                 p->thread.reg17 = kthread_arg;
>                 p->thread.reg29 = childksp;
>                 p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
> #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
>                 status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
>                          ((status & (ST0_KUC | ST0_IEC)) << 2);
> #else
>                 status |= ST0_EXL;
> #endif
>                 childregs->cp0_status = status;
>                 return 0;
>         }
>
> If there is still something missing, I want to find the root cause
> and not paper over it in r4k_switch.S and IMHO break COP2 handling for
> loongsoon completely.
>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-14  9:44                       ` Huacai Chen
@ 2020-08-14 13:16                         ` Jiaxun Yang
  2020-08-26 12:46                         ` Thomas Bogendoerfer
  1 sibling, 0 replies; 26+ messages in thread
From: Jiaxun Yang @ 2020-08-14 13:16 UTC (permalink / raw)
  To: Huacai Chen, Thomas Bogendoerfer; +Cc: open list:MIPS, Fuxin Zhang, Zhangjin Wu



在 2020/8/14 下午5:44, Huacai Chen 写道:
> Hi, Thomas,
>
> On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
>> On Tue, Aug 11, 2020 at 02:45:05PM +0800, Huacai Chen wrote:
>>> Hi, Thomas and Jiaxun,
>>>
>>> On Tue, Aug 11, 2020 at 10:18 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>>>>
>>>>
>>>> 在 2020/8/10 22:12, Thomas Bogendoerfer 写道:
>>>>> On Sun, Aug 09, 2020 at 10:53:13PM +0800, Jiaxun Yang wrote:
>>>>>> Thus we still need to enable CU2 with exception for user space, and we can
>>>>>> always enable CU2 in
>>>>>> kernel since kernel won't be compiled with hard-float. :-)
>>>>> I see, how about the patch below
>>>> That looks fine for me.
>>>> Is it good with you, Huacai?
>>> There are two problems:
>>> 1, zboot (arch/mips/boot/compressed/head.S) should be considered,
>>> because the initial value of Status may or may not contain CU2.
>> this comes with it's own memcpy/memset and stuff, I don't see a reason why
>> COP2 needs to be enabled there,
> gslq/gssq can also be generated by toolchains.

To correct myself, GSLQ/GSSQ won't be generated by current upstream GCC
with msoft-float but it is possible to happen with Loongson's GCC 4.9/7 
fork.

And I think we should care this case as well....

Thanks.

- Jiaxun

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-14  9:44                       ` Huacai Chen
  2020-08-14 13:16                         ` Jiaxun Yang
@ 2020-08-26 12:46                         ` Thomas Bogendoerfer
  2020-08-27  3:48                           ` Huacai Chen
  2020-09-02  6:54                           ` Huacai Chen
  1 sibling, 2 replies; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-26 12:46 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > COP2 needs to be enabled there,
> gslq/gssq can also be generated by toolchains.

I don't want to introduce every single CPU optimization bits into such
a closed first stage loader. So please use $(filter-out) in
arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.

> > which is correct for all user space process, otherwise the whole
> > cop2 exception thing wouldn't work. And if cop2 exception handling
> > has been run it's set in THREAD_STATUS.
> >
> THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> when a process runs in kernel-space. KSTK_STATUS (what you have seen
> in copy_thread_tls() below) means cp0_status in a process's kernel
> stack, which saves the cp0_status when a process runs in user-space.
> Whether COP2 exception can work depends on that KSTK_STATUS (but not
> THREAD_STATUS) should not contain CU2 at the first time. So, whether
> or not THREAD_STATUS contains CU2, it won't break COP2 handling.

so why don't we fix the the in-kernel cp0_status instead ?

How about this ?

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 90b869297893..26fb77a8d406 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
        /*  Put the stack after the struct pt_regs.  */
        childksp = (unsigned long) childregs;
        p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
+       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
        if (unlikely(p->flags & PF_KTHREAD)) {
                /* kernel thread */
                unsigned long status = p->thread.cp0_status;

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-26 12:46                         ` Thomas Bogendoerfer
@ 2020-08-27  3:48                           ` Huacai Chen
  2020-08-28  8:42                             ` Thomas Bogendoerfer
  2020-09-02  6:54                           ` Huacai Chen
  1 sibling, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2020-08-27  3:48 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas,

On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > <tsbogend@alpha.franken.de> wrote:
> > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > COP2 needs to be enabled there,
> > gslq/gssq can also be generated by toolchains.
>
> I don't want to introduce every single CPU optimization bits into such
> a closed first stage loader. So please use $(filter-out) in
> arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
>
> > > which is correct for all user space process, otherwise the whole
> > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > has been run it's set in THREAD_STATUS.
> > >
> > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > in copy_thread_tls() below) means cp0_status in a process's kernel
> > stack, which saves the cp0_status when a process runs in user-space.
> > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
>
> so why don't we fix the the in-kernel cp0_status instead ?
>
> How about this ?
>
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index 90b869297893..26fb77a8d406 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
>         /*  Put the stack after the struct pt_regs.  */
>         childksp = (unsigned long) childregs;
>         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
>         if (unlikely(p->flags & PF_KTHREAD)) {
>                 /* kernel thread */
>                 unsigned long status = p->thread.cp0_status;
This seems a good idea, I will send a new version.

Huacai
>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-27  3:48                           ` Huacai Chen
@ 2020-08-28  8:42                             ` Thomas Bogendoerfer
  2020-08-28  9:21                               ` Huacai Chen
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-28  8:42 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Thu, Aug 27, 2020 at 11:48:29AM +0800, Huacai Chen wrote:
> Hi, Thomas,
> 
> On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> >
> > On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > > <tsbogend@alpha.franken.de> wrote:
> > > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > > COP2 needs to be enabled there,
> > > gslq/gssq can also be generated by toolchains.
> >
> > I don't want to introduce every single CPU optimization bits into such
> > a closed first stage loader. So please use $(filter-out) in
> > arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
> >
> > > > which is correct for all user space process, otherwise the whole
> > > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > > has been run it's set in THREAD_STATUS.
> > > >
> > > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > > in copy_thread_tls() below) means cp0_status in a process's kernel
> > > stack, which saves the cp0_status when a process runs in user-space.
> > > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
> >
> > so why don't we fix the the in-kernel cp0_status instead ?
> >
> > How about this ?
> >
> > diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> > index 90b869297893..26fb77a8d406 100644
> > --- a/arch/mips/kernel/process.c
> > +++ b/arch/mips/kernel/process.c
> > @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
> >         /*  Put the stack after the struct pt_regs.  */
> >         childksp = (unsigned long) childregs;
> >         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> > +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
> >         if (unlikely(p->flags & PF_KTHREAD)) {
> >                 /* kernel thread */
> >                 unsigned long status = p->thread.cp0_status;
> This seems a good idea, I will send a new version.

IMHO it migt be even better to just use

p->thread.cp0_status = read_c0_status();

without masking. We are in kernel, so we took care of whatever CU1/CU2
handling had to be done at kernel entry. So keeping the current cp0 status
for the new thread looks more sane to me.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-28  8:42                             ` Thomas Bogendoerfer
@ 2020-08-28  9:21                               ` Huacai Chen
  2020-08-28  9:33                                 ` Thomas Bogendoerfer
  0 siblings, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2020-08-28  9:21 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas,

On Fri, Aug 28, 2020 at 4:43 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Thu, Aug 27, 2020 at 11:48:29AM +0800, Huacai Chen wrote:
> > Hi, Thomas,
> >
> > On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
> > <tsbogend@alpha.franken.de> wrote:
> > >
> > > On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > > > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > > > <tsbogend@alpha.franken.de> wrote:
> > > > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > > > COP2 needs to be enabled there,
> > > > gslq/gssq can also be generated by toolchains.
> > >
> > > I don't want to introduce every single CPU optimization bits into such
> > > a closed first stage loader. So please use $(filter-out) in
> > > arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
> > >
> > > > > which is correct for all user space process, otherwise the whole
> > > > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > > > has been run it's set in THREAD_STATUS.
> > > > >
> > > > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > > > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > > > in copy_thread_tls() below) means cp0_status in a process's kernel
> > > > stack, which saves the cp0_status when a process runs in user-space.
> > > > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > > > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > > > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
> > >
> > > so why don't we fix the the in-kernel cp0_status instead ?
> > >
> > > How about this ?
> > >
> > > diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> > > index 90b869297893..26fb77a8d406 100644
> > > --- a/arch/mips/kernel/process.c
> > > +++ b/arch/mips/kernel/process.c
> > > @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
> > >         /*  Put the stack after the struct pt_regs.  */
> > >         childksp = (unsigned long) childregs;
> > >         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> > > +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
> > >         if (unlikely(p->flags & PF_KTHREAD)) {
> > >                 /* kernel thread */
> > >                 unsigned long status = p->thread.cp0_status;
> > This seems a good idea, I will send a new version.
>
> IMHO it migt be even better to just use
>
> p->thread.cp0_status = read_c0_status();
>
> without masking. We are in kernel, so we took care of whatever CU1/CU2
> handling had to be done at kernel entry. So keeping the current cp0 status
> for the new thread looks more sane to me.
I think this may cause FPU be enabled in kernel by mistake.

>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-28  9:21                               ` Huacai Chen
@ 2020-08-28  9:33                                 ` Thomas Bogendoerfer
  2020-08-28  9:52                                   ` Huacai Chen
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-28  9:33 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Fri, Aug 28, 2020 at 05:21:19PM +0800, Huacai Chen wrote:
> Hi, Thomas,
> 
> On Fri, Aug 28, 2020 at 4:43 PM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> >
> > On Thu, Aug 27, 2020 at 11:48:29AM +0800, Huacai Chen wrote:
> > > Hi, Thomas,
> > >
> > > On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
> > > <tsbogend@alpha.franken.de> wrote:
> > > >
> > > > On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > > > > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > > > > <tsbogend@alpha.franken.de> wrote:
> > > > > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > > > > COP2 needs to be enabled there,
> > > > > gslq/gssq can also be generated by toolchains.
> > > >
> > > > I don't want to introduce every single CPU optimization bits into such
> > > > a closed first stage loader. So please use $(filter-out) in
> > > > arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
> > > >
> > > > > > which is correct for all user space process, otherwise the whole
> > > > > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > > > > has been run it's set in THREAD_STATUS.
> > > > > >
> > > > > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > > > > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > > > > in copy_thread_tls() below) means cp0_status in a process's kernel
> > > > > stack, which saves the cp0_status when a process runs in user-space.
> > > > > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > > > > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > > > > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
> > > >
> > > > so why don't we fix the the in-kernel cp0_status instead ?
> > > >
> > > > How about this ?
> > > >
> > > > diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> > > > index 90b869297893..26fb77a8d406 100644
> > > > --- a/arch/mips/kernel/process.c
> > > > +++ b/arch/mips/kernel/process.c
> > > > @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
> > > >         /*  Put the stack after the struct pt_regs.  */
> > > >         childksp = (unsigned long) childregs;
> > > >         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> > > > +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
> > > >         if (unlikely(p->flags & PF_KTHREAD)) {
> > > >                 /* kernel thread */
> > > >                 unsigned long status = p->thread.cp0_status;
> > > This seems a good idea, I will send a new version.
> >
> > IMHO it migt be even better to just use
> >
> > p->thread.cp0_status = read_c0_status();
> >
> > without masking. We are in kernel, so we took care of whatever CU1/CU2
> > handling had to be done at kernel entry. So keeping the current cp0 status
> > for the new thread looks more sane to me.
> I think this may cause FPU be enabled in kernel by mistake.

if it is enabled at that point, it was already enabled in kernel.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-28  9:33                                 ` Thomas Bogendoerfer
@ 2020-08-28  9:52                                   ` Huacai Chen
  2020-08-28 11:12                                     ` Thomas Bogendoerfer
  0 siblings, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2020-08-28  9:52 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas,

On Fri, Aug 28, 2020 at 5:34 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Fri, Aug 28, 2020 at 05:21:19PM +0800, Huacai Chen wrote:
> > Hi, Thomas,
> >
> > On Fri, Aug 28, 2020 at 4:43 PM Thomas Bogendoerfer
> > <tsbogend@alpha.franken.de> wrote:
> > >
> > > On Thu, Aug 27, 2020 at 11:48:29AM +0800, Huacai Chen wrote:
> > > > Hi, Thomas,
> > > >
> > > > On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
> > > > <tsbogend@alpha.franken.de> wrote:
> > > > >
> > > > > On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > > > > > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > > > > > <tsbogend@alpha.franken.de> wrote:
> > > > > > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > > > > > COP2 needs to be enabled there,
> > > > > > gslq/gssq can also be generated by toolchains.
> > > > >
> > > > > I don't want to introduce every single CPU optimization bits into such
> > > > > a closed first stage loader. So please use $(filter-out) in
> > > > > arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
> > > > >
> > > > > > > which is correct for all user space process, otherwise the whole
> > > > > > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > > > > > has been run it's set in THREAD_STATUS.
> > > > > > >
> > > > > > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > > > > > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > > > > > in copy_thread_tls() below) means cp0_status in a process's kernel
> > > > > > stack, which saves the cp0_status when a process runs in user-space.
> > > > > > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > > > > > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > > > > > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
> > > > >
> > > > > so why don't we fix the the in-kernel cp0_status instead ?
> > > > >
> > > > > How about this ?
> > > > >
> > > > > diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> > > > > index 90b869297893..26fb77a8d406 100644
> > > > > --- a/arch/mips/kernel/process.c
> > > > > +++ b/arch/mips/kernel/process.c
> > > > > @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
> > > > >         /*  Put the stack after the struct pt_regs.  */
> > > > >         childksp = (unsigned long) childregs;
> > > > >         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> > > > > +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
> > > > >         if (unlikely(p->flags & PF_KTHREAD)) {
> > > > >                 /* kernel thread */
> > > > >                 unsigned long status = p->thread.cp0_status;
> > > > This seems a good idea, I will send a new version.
> > >
> > > IMHO it migt be even better to just use
> > >
> > > p->thread.cp0_status = read_c0_status();
> > >
> > > without masking. We are in kernel, so we took care of whatever CU1/CU2
> > > handling had to be done at kernel entry. So keeping the current cp0 status
> > > for the new thread looks more sane to me.
> > I think this may cause FPU be enabled in kernel by mistake.
>
> if it is enabled at that point, it was already enabled in kernel.
In kernel FPU may be enabled temporarily, and it seems a preemptible
kernel may enable FPU for a new process (maybe I'm wrong, this is a
bit complex).

Huacai

>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-28  9:52                                   ` Huacai Chen
@ 2020-08-28 11:12                                     ` Thomas Bogendoerfer
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-08-28 11:12 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Fri, Aug 28, 2020 at 05:52:05PM +0800, Huacai Chen wrote:
> Hi, Thomas,
> 
> On Fri, Aug 28, 2020 at 5:34 PM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> > if it is enabled at that point, it was already enabled in kernel.
> In kernel FPU may be enabled temporarily, and it seems a preemptible
> kernel may enable FPU for a new process (maybe I'm wrong, this is a
> bit complex).

no you are completly right. We need to clear CU1 in this place otherwise
a orphan cpu warning will trigger. It's enabled, if the current thread owns
it, but the newly created thread doesn't.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-08-26 12:46                         ` Thomas Bogendoerfer
  2020-08-27  3:48                           ` Huacai Chen
@ 2020-09-02  6:54                           ` Huacai Chen
  2020-09-03  8:43                             ` Thomas Bogendoerfer
  1 sibling, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2020-09-02  6:54 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

Hi, Thomas,

On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > <tsbogend@alpha.franken.de> wrote:
> > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > COP2 needs to be enabled there,
> > gslq/gssq can also be generated by toolchains.
>
> I don't want to introduce every single CPU optimization bits into such
> a closed first stage loader. So please use $(filter-out) in
> arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
>
> > > which is correct for all user space process, otherwise the whole
> > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > has been run it's set in THREAD_STATUS.
> > >
> > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > in copy_thread_tls() below) means cp0_status in a process's kernel
> > stack, which saves the cp0_status when a process runs in user-space.
> > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
>
> so why don't we fix the the in-kernel cp0_status instead ?
>
> How about this ?
>
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index 90b869297893..26fb77a8d406 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
>         /*  Put the stack after the struct pt_regs.  */
>         childksp = (unsigned long) childregs;
>         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
>         if (unlikely(p->flags & PF_KTHREAD)) {
>                 /* kernel thread */
>                 unsigned long status = p->thread.cp0_status;
I tried this way but it doesn't work, the reason is that the resume
routine in r4k_switch.S save the current hardware status into
THREAD_STATUS, but CU2 in hardware is cleared in its caller (i.e.,
switch_to). However, I will send V5 to use ST0_KERNEL_CUMASK in all
possible places to avoid #ifdefs.

Huacai

>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel
  2020-09-02  6:54                           ` Huacai Chen
@ 2020-09-03  8:43                             ` Thomas Bogendoerfer
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Bogendoerfer @ 2020-09-03  8:43 UTC (permalink / raw)
  To: Huacai Chen; +Cc: Jiaxun Yang, open list:MIPS, Fuxin Zhang, Zhangjin Wu

On Wed, Sep 02, 2020 at 02:54:10PM +0800, Huacai Chen wrote:
> Hi, Thomas,
> 
> On Wed, Aug 26, 2020 at 8:48 PM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> >
> > On Fri, Aug 14, 2020 at 05:44:18PM +0800, Huacai Chen wrote:
> > > On Tue, Aug 11, 2020 at 8:08 PM Thomas Bogendoerfer
> > > <tsbogend@alpha.franken.de> wrote:
> > > > this comes with it's own memcpy/memset and stuff, I don't see a reason why
> > > > COP2 needs to be enabled there,
> > > gslq/gssq can also be generated by toolchains.
> >
> > I don't want to introduce every single CPU optimization bits into such
> > a closed first stage loader. So please use $(filter-out) in
> > arch/mips/boot/compressed/Makefile to disable creation of 16byte load/stores.
> >
> > > > which is correct for all user space process, otherwise the whole
> > > > cop2 exception thing wouldn't work. And if cop2 exception handling
> > > > has been run it's set in THREAD_STATUS.
> > > >
> > > THREAD_STATUS means thread_struct.cp0_status, which is the cp0_status
> > > when a process runs in kernel-space. KSTK_STATUS (what you have seen
> > > in copy_thread_tls() below) means cp0_status in a process's kernel
> > > stack, which saves the cp0_status when a process runs in user-space.
> > > Whether COP2 exception can work depends on that KSTK_STATUS (but not
> > > THREAD_STATUS) should not contain CU2 at the first time. So, whether
> > > or not THREAD_STATUS contains CU2, it won't break COP2 handling.
> >
> > so why don't we fix the the in-kernel cp0_status instead ?
> >
> > How about this ?
> >
> > diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> > index 90b869297893..26fb77a8d406 100644
> > --- a/arch/mips/kernel/process.c
> > +++ b/arch/mips/kernel/process.c
> > @@ -133,6 +133,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
> >         /*  Put the stack after the struct pt_regs.  */
> >         childksp = (unsigned long) childregs;
> >         p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
> > +       p->thread.cp0_status |= ST0_KERNEL_CUMASK;
> >         if (unlikely(p->flags & PF_KTHREAD)) {
> >                 /* kernel thread */
> >                 unsigned long status = p->thread.cp0_status;
> I tried this way but it doesn't work, the reason is that the resume
> routine in r4k_switch.S save the current hardware status into
> THREAD_STATUS, but CU2 in hardware is cleared in its caller (i.e.,
> switch_to).

so let's fix it there:

diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 0b0a93bf83cd..a4374b4cb88f 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -117,6 +117,8 @@ do {                                                                        \
                __restore_dsp(next);                                    \
        }                                                               \
        if (cop2_present) {                                             \
+               u32 status = read_c0_status();                          \
+                                                                       \
                set_c0_status(ST0_CU2);                                 \
                if ((KSTK_STATUS(prev) & ST0_CU2)) {                    \
                        if (cop2_lazy_restore)                          \
@@ -127,7 +129,7 @@ do {                                                                        \
                    !cop2_lazy_restore) {                               \
                        cop2_restore(next);                             \
                }                                                       \
-               clear_c0_status(ST0_CU2);                               \
+               write_c0_status(status);                                \
        }                                                               \
        __clear_r5_hw_ll_bit();                                         \
        __clear_software_ll_bit();                                      \

BTW. if we come up to a final solution, this change should be a seperate
patch. And the change in process.c probably, too.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2020-09-03  8:43 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-02  4:55 [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Huacai Chen
2020-05-02  4:55 ` [PATCH V3 2/2] MIPS: Loongson-3: Calculate ra properly when unwinding the stack Huacai Chen
2020-05-06  5:29 ` [PATCH V3 1/2] MIPS: Loongson-3: Enable COP2 usage in kernel Jiaxun Yang
2020-08-01  7:59   ` Huacai Chen
2020-08-05 12:10     ` Thomas Bogendoerfer
2020-08-05 13:51       ` Jiaxun Yang
2020-08-06  1:15         ` Huacai Chen
2020-08-07 13:13         ` Thomas Bogendoerfer
2020-08-07 13:25           ` Jiaxun Yang
2020-08-07 13:36             ` Thomas Bogendoerfer
2020-08-09 14:53             ` Jiaxun Yang
2020-08-10 14:12               ` Thomas Bogendoerfer
2020-08-11  2:16                 ` Jiaxun Yang
2020-08-11  6:45                   ` Huacai Chen
2020-08-11 12:06                     ` Thomas Bogendoerfer
2020-08-14  9:44                       ` Huacai Chen
2020-08-14 13:16                         ` Jiaxun Yang
2020-08-26 12:46                         ` Thomas Bogendoerfer
2020-08-27  3:48                           ` Huacai Chen
2020-08-28  8:42                             ` Thomas Bogendoerfer
2020-08-28  9:21                               ` Huacai Chen
2020-08-28  9:33                                 ` Thomas Bogendoerfer
2020-08-28  9:52                                   ` Huacai Chen
2020-08-28 11:12                                     ` Thomas Bogendoerfer
2020-09-02  6:54                           ` Huacai Chen
2020-09-03  8:43                             ` Thomas Bogendoerfer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).