linux-parisc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][RFC] parisc: Use ldcw,co on uniprocessor machines only
@ 2019-12-11 20:16 Helge Deller
  2019-12-11 21:29 ` John David Anglin
  0 siblings, 1 reply; 3+ messages in thread
From: Helge Deller @ 2019-12-11 20:16 UTC (permalink / raw)
  To: linux-parisc, James Bottomley, John David Anglin

The only atomic operation on parisc is the ldcw instruction, which loads
a 32bit word from an address and replaces it by zero (load and clear
word). This instruction is used to implement kernel internal spinlocks.

Up to now we tried to optimize the ldcw usage by using the coherent
completer of this command, which operates on the cache (instead of
memory) and thus might speed up things, and which was enabled by default
on our 64bit kernel build.

But we still see runtime locking problems, so this patch changes it back
to use ldcw for 32- and 64-bit kernels, and live-patches it at runtime
to use the coherent completer when running on a uniprocessor machine.

Signed-off-by: Helge Deller <deller@gmx.de>

diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h
index 0ec54f43d6d2..2667ec07acb9 100644
--- a/arch/parisc/include/asm/alternative.h
+++ b/arch/parisc/include/asm/alternative.h
@@ -11,6 +11,7 @@
 #define ALT_COND_RUN_ON_QEMU	0x20	/* if running on QEMU */

 #define INSN_PxTLB	0x02		/* modify pdtlb, pitlb */
+#define INSN_LDCW_CO	0x03		/* change cc in ldcw to ldcw,co */
 #define INSN_NOP	0x08000240	/* nop */

 #ifndef __ASSEMBLY__
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index a39250cb7dfc..8d6e76279d80 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -44,8 +44,9 @@

 #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)

+#define LDCW		ALTERNATIVE(., .+4, ALT_COND_NO_SMP, INSN_LDCW_CO) ! ldcw
+
 #ifdef CONFIG_PA20
-#define LDCW		ldcw,co
 #define BL		b,l
 # ifdef CONFIG_64BIT
 #  define PA_ASM_LEVEL	2.0w
@@ -53,7 +54,6 @@
 #  define PA_ASM_LEVEL	2.0
 # endif
 #else
-#define LDCW		ldcw
 #define BL		bl
 #define PA_ASM_LEVEL	1.1
 #endif
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index e080143e79a3..59130b0dbc3e 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -2,7 +2,8 @@
 #ifndef __PARISC_LDCW_H
 #define __PARISC_LDCW_H

-#ifndef CONFIG_PA20
+#include <asm/alternative.h>
+
 /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
    and GCC only guarantees 8-byte alignment for stack locals, we can't
    be assured of 16-byte alignment for atomic lock data even if we
@@ -19,22 +20,6 @@
 		& ~(__PA_LDCW_ALIGNMENT - 1);			\
 	(volatile unsigned int *) __ret;			\
 })
-#define __LDCW	"ldcw"
-
-#else /*CONFIG_PA20*/
-/* From: "Jim Hull" <jim.hull of hp.com>
-   I've attached a summary of the change, but basically, for PA 2.0, as
-   long as the ",CO" (coherent operation) completer is specified, then the
-   16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
-   they only require "natural" alignment (4-byte for ldcw, 8-byte for
-   ldcd). */
-
-#define __PA_LDCW_ALIGNMENT	4
-#define __PA_LDCW_ALIGN_ORDER	2
-#define __ldcw_align(a) (&(a)->slock)
-#define __LDCW	"ldcw,co"
-
-#endif /*!CONFIG_PA20*/

 /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
    We don't explicitly expose that "*a" may be written as reload
@@ -46,7 +31,8 @@
    usually used within code blocks surrounded by memory barriers.  */
 #define __ldcw(a) ({						\
 	unsigned __ret;						\
-	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
+	__asm__ __volatile__("ldcw 0(%1),%0"			\
+		ALTERNATIVE(ALT_COND_NO_SMP, INSN_LDCW_CO)	\
 		: "=r" (__ret) : "r" (a) : "memory");		\
 	__ret;							\
 })
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index 42979c5704dc..82d2384c3f22 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -3,13 +3,8 @@
 #define __ASM_SPINLOCK_TYPES_H

 typedef struct {
-#ifdef CONFIG_PA20
-	volatile unsigned int slock;
-# define __ARCH_SPIN_LOCK_UNLOCKED { 1 }
-#else
 	volatile unsigned int lock[4];
 # define __ARCH_SPIN_LOCK_UNLOCKED	{ { 1, 1, 1, 1 } }
-#endif
 } arch_spinlock_t;

 typedef struct {
diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c
index 3c66d5c4d90d..cf83a801cc2a 100644
--- a/arch/parisc/kernel/alternative.c
+++ b/arch/parisc/kernel/alternative.c
@@ -69,6 +69,12 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
 			if (boot_cpu_data.cpu_type >= pcxu) /* >= pa2.0 ? */
 				replacement |= (1 << 10); /* set el bit */
 		}
+		/* Want to replace ldcw by a ldcw,co instruction? */
+		if (replacement == INSN_LDCW_CO) {
+			replacement = *from;
+			/* set cache-coherent completer bits: */
+			replacement |= (0x01 << 10);
+		}

 		/*
 		 * Replace instruction with NOPs?

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-12-12  9:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-11 20:16 [PATCH][RFC] parisc: Use ldcw,co on uniprocessor machines only Helge Deller
2019-12-11 21:29 ` John David Anglin
2019-12-12  9:01   ` Helge Deller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).