All of lore.kernel.org
 help / color / mirror / Atom feed
From: guoren@kernel.org
To: paul.walmsley@sifive.com, palmer@dabbelt.com, guoren@kernel.org,
	panqinglin2020@iscas.ac.cn, bjorn@rivosinc.com,
	conor.dooley@microchip.com, leobras@redhat.com,
	peterz@infradead.org, anup@brainfault.org, keescook@chromium.org,
	wuwei2016@iscas.ac.cn, xiaoguang.xing@sophgo.com,
	chao.wei@sophgo.com, unicorn_wang@outlook.com, uwu@icenowy.me,
	jszhang@kernel.org, wefu@redhat.com, atishp@atishpatra.org
Cc: linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
	Guo Ren <guoren@linux.alibaba.com>
Subject: [PATCH V12 04/14] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
Date: Mon, 25 Dec 2023 07:58:37 -0500	[thread overview]
Message-ID: <20231225125847.2778638-5-guoren@kernel.org> (raw)
In-Reply-To: <20231225125847.2778638-1-guoren@kernel.org>

From: Guo Ren <guoren@linux.alibaba.com>

The early version of T-Head C9xx cores has a store merge buffer
delay problem. The store merge buffer could improve the store queue
performance by merging multi-store requests, but when there are not
continued store requests, the prior single store request would be
waiting in the store queue for a long time. That would cause
significant problems for communication between multi-cores. This
problem was found on sg2042 & th1520 platforms with the qspinlock
lock torture test.

So appending a fence w.o could immediately flush the store merge
buffer and let other cores see the write result.

This will apply the WRITE_ONCE errata to handle the non-standard
behavior via appending a fence w.o instruction for WRITE_ONCE().

Reviewed-by: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/Kconfig.errata              | 19 ++++++++++++++++
 arch/riscv/errata/thead/errata.c       | 20 +++++++++++++++++
 arch/riscv/include/asm/rwonce.h        | 31 ++++++++++++++++++++++++++
 arch/riscv/include/asm/vendorid_list.h |  3 ++-
 include/asm-generic/rwonce.h           |  2 ++
 5 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/rwonce.h

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index e2c731cfed8c..2824ff165741 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -99,4 +99,23 @@ config ERRATA_THEAD_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_WRITE_ONCE
+	bool "Apply T-Head WRITE_ONCE errata"
+	depends on ERRATA_THEAD
+	default y
+	help
+	  The early version of T-Head C9xx cores of sg2042 has a store merge
+	  buffer delay problem. The store merge buffer could improve the store
+	  queue performance by merging multi-store requests, but when there are
+	  no continued store requests, the prior single store request would be
+	  waiting in the store queue for a long time. That would cause signifi-
+	  cant problems for communication between multi-cores. Appending a
+	  fence w.o could immediately flush the store merge buffer and let other
+	  cores see the write result.
+
+	  This will apply the WRITE_ONCE errata to handle the non-standard beh-
+	  avior via appending a fence w.o instruction for WRITE_ONCE().
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 0554ed4bf087..f6c1da819670 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_write_once(unsigned int stage,
+				    unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_BOOT ||
+	    stage == RISCV_ALTERNATIVES_MODULE)
+		return true;
+
+	return false;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
 
+	if (errata_probe_write_once(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
new file mode 100644
index 000000000000..4c407c482ed0
--- /dev/null
+++ b/arch/riscv/include/asm/rwonce.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+#include <asm/vendorid_list.h>
+
+#if defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE)
+#define write_once_flush()				\
+do {							\
+	asm volatile(ALTERNATIVE(			\
+		__nops(1),				\
+		"fence w, o\n\t",			\
+		THEAD_VENDOR_ID,			\
+		ERRATA_THEAD_WRITE_ONCE,		\
+		CONFIG_ERRATA_THEAD_WRITE_ONCE)		\
+		: : : "memory");			\
+} while (0)
+
+#define __WRITE_ONCE(x, val)				\
+do {							\
+	*(volatile typeof(x) *)&(x) = (val);		\
+	write_once_flush();				\
+} while (0)
+#endif
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_RWONCE_H */
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index c503373193d2..5df1862bf0c9 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -24,7 +24,8 @@
 #define	ERRATA_THEAD_PBMT 0
 #define	ERRATA_THEAD_CMO 1
 #define	ERRATA_THEAD_PMU 2
-#define	ERRATA_THEAD_NUMBER 3
+#define	ERRATA_THEAD_WRITE_ONCE 3
+#define	ERRATA_THEAD_NUMBER 4
 #endif
 
 #endif
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 8d0a6280e982..fb07fe8c6e45 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -50,10 +50,12 @@
 	__READ_ONCE(x);							\
 })
 
+#ifndef __WRITE_ONCE
 #define __WRITE_ONCE(x, val)						\
 do {									\
 	*(volatile typeof(x) *)&(x) = (val);				\
 } while (0)
+#endif
 
 #define WRITE_ONCE(x, val)						\
 do {									\
-- 
2.40.1


WARNING: multiple messages have this Message-ID (diff)
From: guoren@kernel.org
To: paul.walmsley@sifive.com, palmer@dabbelt.com, guoren@kernel.org,
	panqinglin2020@iscas.ac.cn, bjorn@rivosinc.com,
	conor.dooley@microchip.com, leobras@redhat.com,
	peterz@infradead.org, anup@brainfault.org, keescook@chromium.org,
	wuwei2016@iscas.ac.cn, xiaoguang.xing@sophgo.com,
	chao.wei@sophgo.com, unicorn_wang@outlook.com, uwu@icenowy.me,
	jszhang@kernel.org, wefu@redhat.com, atishp@atishpatra.org
Cc: linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
	Guo Ren <guoren@linux.alibaba.com>
Subject: [PATCH V12 04/14] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
Date: Mon, 25 Dec 2023 07:58:37 -0500	[thread overview]
Message-ID: <20231225125847.2778638-5-guoren@kernel.org> (raw)
In-Reply-To: <20231225125847.2778638-1-guoren@kernel.org>

From: Guo Ren <guoren@linux.alibaba.com>

The early version of T-Head C9xx cores has a store merge buffer
delay problem. The store merge buffer could improve the store queue
performance by merging multi-store requests, but when there are not
continued store requests, the prior single store request would be
waiting in the store queue for a long time. That would cause
significant problems for communication between multi-cores. This
problem was found on sg2042 & th1520 platforms with the qspinlock
lock torture test.

So appending a fence w.o could immediately flush the store merge
buffer and let other cores see the write result.

This will apply the WRITE_ONCE errata to handle the non-standard
behavior via appending a fence w.o instruction for WRITE_ONCE().

Reviewed-by: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/Kconfig.errata              | 19 ++++++++++++++++
 arch/riscv/errata/thead/errata.c       | 20 +++++++++++++++++
 arch/riscv/include/asm/rwonce.h        | 31 ++++++++++++++++++++++++++
 arch/riscv/include/asm/vendorid_list.h |  3 ++-
 include/asm-generic/rwonce.h           |  2 ++
 5 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/rwonce.h

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index e2c731cfed8c..2824ff165741 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -99,4 +99,23 @@ config ERRATA_THEAD_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_WRITE_ONCE
+	bool "Apply T-Head WRITE_ONCE errata"
+	depends on ERRATA_THEAD
+	default y
+	help
+	  The early version of T-Head C9xx cores of sg2042 has a store merge
+	  buffer delay problem. The store merge buffer could improve the store
+	  queue performance by merging multi-store requests, but when there are
+	  no continued store requests, the prior single store request would be
+	  waiting in the store queue for a long time. That would cause signifi-
+	  cant problems for communication between multi-cores. Appending a
+	  fence w.o could immediately flush the store merge buffer and let other
+	  cores see the write result.
+
+	  This will apply the WRITE_ONCE errata to handle the non-standard beh-
+	  avior via appending a fence w.o instruction for WRITE_ONCE().
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 0554ed4bf087..f6c1da819670 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_write_once(unsigned int stage,
+				    unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_BOOT ||
+	    stage == RISCV_ALTERNATIVES_MODULE)
+		return true;
+
+	return false;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
 
+	if (errata_probe_write_once(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
new file mode 100644
index 000000000000..4c407c482ed0
--- /dev/null
+++ b/arch/riscv/include/asm/rwonce.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+#include <asm/vendorid_list.h>
+
+#if defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE)
+#define write_once_flush()				\
+do {							\
+	asm volatile(ALTERNATIVE(			\
+		__nops(1),				\
+		"fence w, o\n\t",			\
+		THEAD_VENDOR_ID,			\
+		ERRATA_THEAD_WRITE_ONCE,		\
+		CONFIG_ERRATA_THEAD_WRITE_ONCE)		\
+		: : : "memory");			\
+} while (0)
+
+#define __WRITE_ONCE(x, val)				\
+do {							\
+	*(volatile typeof(x) *)&(x) = (val);		\
+	write_once_flush();				\
+} while (0)
+#endif
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_RWONCE_H */
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index c503373193d2..5df1862bf0c9 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -24,7 +24,8 @@
 #define	ERRATA_THEAD_PBMT 0
 #define	ERRATA_THEAD_CMO 1
 #define	ERRATA_THEAD_PMU 2
-#define	ERRATA_THEAD_NUMBER 3
+#define	ERRATA_THEAD_WRITE_ONCE 3
+#define	ERRATA_THEAD_NUMBER 4
 #endif
 
 #endif
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 8d0a6280e982..fb07fe8c6e45 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -50,10 +50,12 @@
 	__READ_ONCE(x);							\
 })
 
+#ifndef __WRITE_ONCE
 #define __WRITE_ONCE(x, val)						\
 do {									\
 	*(volatile typeof(x) *)&(x) = (val);				\
 } while (0)
+#endif
 
 #define WRITE_ONCE(x, val)						\
 do {									\
-- 
2.40.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2023-12-25 12:59 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-25 12:58 [PATCH V12 00/14] riscv: Add Native/Paravirt qspinlock support guoren
2023-12-25 12:58 ` guoren
2023-12-25 12:58 ` [PATCH V12 01/14] asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 02/14] asm-generic: ticket-lock: Add separate ticket-lock.h guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 03/14] riscv: errata: Move errata vendor func-id into vendorid_list.h guoren
2023-12-25 12:58   ` guoren
2024-01-04  4:00   ` Leonardo Bras
2024-01-04  4:00     ` Leonardo Bras
2023-12-25 12:58 ` guoren [this message]
2023-12-25 12:58   ` [PATCH V12 04/14] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup guoren
2023-12-25 12:58 ` [PATCH V12 05/14] riscv: qspinlock: Add basic queued_spinlock support guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 06/14] riscv: qspinlock: Introduce combo spinlock guoren
2023-12-25 12:58   ` guoren
2024-01-04  4:56   ` Leonardo Bras
2024-01-04  4:56     ` Leonardo Bras
2023-12-25 12:58 ` [PATCH V12 07/14] riscv: qspinlock: Add virt_spin_lock() support for VM guest guoren
2023-12-25 12:58   ` guoren
2024-01-04  5:06   ` Leonardo Bras
2024-01-04  5:06     ` Leonardo Bras
2023-12-25 12:58 ` [PATCH V12 08/14] riscv: qspinlock: Force virt_spin_lock for KVM guests guoren
2023-12-25 12:58   ` guoren
2024-01-04  5:11   ` Leonardo Bras
2024-01-04  5:11     ` Leonardo Bras
2023-12-25 12:58 ` [PATCH V12 09/14] RISC-V: paravirt: Add pvqspinlock KVM backend guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 10/14] RISC-V: paravirt: Add pvqspinlock frontend skeleton guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 11/14] RISC-V: paravirt: pvqspinlock: Add SBI implementation guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 12/14] RISC-V: paravirt: pvqspinlock: Add nopvspin kernel parameter guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 13/14] RISC-V: paravirt: pvqspinlock: Add kconfig entry guoren
2023-12-25 12:58   ` guoren
2023-12-25 12:58 ` [PATCH V12 14/14] RISC-V: paravirt: pvqspinlock: Add trace point for pv_kick/wait guoren
2023-12-25 12:58   ` guoren
2023-12-26  0:35 ` [PATCH V12 00/14] riscv: Add Native/Paravirt qspinlock support Guo Ren
2023-12-26  0:35   ` Guo Ren

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231225125847.2778638-5-guoren@kernel.org \
    --to=guoren@kernel.org \
    --cc=anup@brainfault.org \
    --cc=atishp@atishpatra.org \
    --cc=bjorn@rivosinc.com \
    --cc=chao.wei@sophgo.com \
    --cc=conor.dooley@microchip.com \
    --cc=guoren@linux.alibaba.com \
    --cc=jszhang@kernel.org \
    --cc=keescook@chromium.org \
    --cc=kvm@vger.kernel.org \
    --cc=leobras@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=panqinglin2020@iscas.ac.cn \
    --cc=paul.walmsley@sifive.com \
    --cc=peterz@infradead.org \
    --cc=unicorn_wang@outlook.com \
    --cc=uwu@icenowy.me \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=wefu@redhat.com \
    --cc=wuwei2016@iscas.ac.cn \
    --cc=xiaoguang.xing@sophgo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.