linux-mips.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements
@ 2017-03-29  8:24 Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 1/8] MIPS: Loongson: Add Loongson-3A R3 basic support Huacai Chen
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

This patchset is is prepared for the next 4.12 release for Linux/MIPS.
It adds Loongson-3A R3 and Loongson's NMI handler support, adds a
"model name" knob in /proc/cpuinfo which is needed by some userspace
tools, improves I/O performance by IRQ balancing and IRQ affinity
setting, fixes indexed scache flushing for Loongson-3, and introduces
LOONGSON_LLSC_WAR to improve stability.

V1 -> V2:
1, Add Loongson-3A R3 basic support.
2, Sync the code to upstream.

V2 -> V3:
1, Add r4k_blast_scache_node for Loongson-3.
2, Update the last patch to avoid miscompilation.
3, Sync the code to upstream.

Huacai Chen(8):
 MIPS: Loongson: Add Loongson-3A R3 basic support.
 MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3.
 MIPS: Loongson: Add NMI handler support.
 MIPS: Loongson-3: IRQ balancing for PCI devices.
 MIPS: Loongson-3: support irq_set_affinity() in i8259 chip.
 MIPS: Loogson: Make enum loongson_cpu_type more clear.
 MIPS: Add __cpu_full_name[] to make CPU names more human-readable.
 MIPS: Loongson: Introduce and use LOONGSON_LLSC_WAR.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/atomic.h                     | 107 ++++++++
 arch/mips/include/asm/bitops.h                     | 273 ++++++++++++++++-----
 arch/mips/include/asm/cmpxchg.h                    |  54 ++++
 arch/mips/include/asm/cpu-info.h                   |   2 +
 arch/mips/include/asm/cpu.h                        |   1 +
 arch/mips/include/asm/edac.h                       |  33 ++-
 arch/mips/include/asm/futex.h                      |  62 +++++
 arch/mips/include/asm/irq.h                        |   3 +
 arch/mips/include/asm/local.h                      |  34 +++
 arch/mips/include/asm/mach-cavium-octeon/war.h     |   1 +
 arch/mips/include/asm/mach-generic/war.h           |   1 +
 arch/mips/include/asm/mach-ip22/war.h              |   1 +
 arch/mips/include/asm/mach-ip27/war.h              |   1 +
 arch/mips/include/asm/mach-ip28/war.h              |   1 +
 arch/mips/include/asm/mach-ip32/war.h              |   1 +
 arch/mips/include/asm/mach-loongson64/boot_param.h |  22 +-
 arch/mips/include/asm/mach-loongson64/war.h        |  26 ++
 arch/mips/include/asm/mach-malta/war.h             |   1 +
 arch/mips/include/asm/mach-pmcs-msp71xx/war.h      |   1 +
 arch/mips/include/asm/mach-rc32434/war.h           |   1 +
 arch/mips/include/asm/mach-rm/war.h                |   1 +
 arch/mips/include/asm/mach-sibyte/war.h            |   1 +
 arch/mips/include/asm/mach-tx49xx/war.h            |   1 +
 arch/mips/include/asm/pgtable.h                    |  19 ++
 arch/mips/include/asm/r4kcache.h                   |  26 ++
 arch/mips/include/asm/spinlock.h                   | 142 +++++++++++
 arch/mips/include/asm/war.h                        |   8 +
 arch/mips/kernel/cpu-probe.c                       |  19 ++
 arch/mips/kernel/proc.c                            |   4 +
 arch/mips/kernel/syscall.c                         |  34 +++
 arch/mips/loongson64/Platform                      |   3 +
 arch/mips/loongson64/common/env.c                  |  12 +-
 arch/mips/loongson64/common/init.c                 |  13 +
 arch/mips/loongson64/loongson-3/irq.c              |  53 +++-
 arch/mips/loongson64/loongson-3/smp.c              |  23 +-
 arch/mips/mm/c-r4k.c                               |  33 ++-
 arch/mips/mm/tlbex.c                               |  17 ++
 drivers/irqchip/irq-i8259.c                        |   3 +
 drivers/platform/mips/cpu_hwmon.c                  |  17 +-
 39 files changed, 970 insertions(+), 85 deletions(-)
 create mode 100644 arch/mips/include/asm/mach-loongson64/war.h
--
2.7.0

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V3 1/8] MIPS: Loongson: Add Loongson-3A R3 basic support
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 2/8] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

Loongson-3A R3 is very similar to Loongson-3A R2.

All Loongson-3 CPU family:

Code-name       Brand-name       PRId
Loongson-3A R1  Loongson-3A1000  0x6305
Loongson-3A R2  Loongson-3A2000  0x6308
Loongson-3A R3  Loongson-3A3000  0x6309
Loongson-3B R1  Loongson-3B1000  0x6306
Loongson-3B R2  Loongson-3B1500  0x6307

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/cpu.h           |  1 +
 arch/mips/kernel/cpu-probe.c          |  6 ++++++
 arch/mips/loongson64/common/env.c     |  1 +
 arch/mips/loongson64/loongson-3/smp.c |  5 +++--
 drivers/platform/mips/cpu_hwmon.c     | 17 +++++++++++++----
 5 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 9a83724..255ead7 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -247,6 +247,7 @@
 #define PRID_REV_LOONGSON3B_R1	0x0006
 #define PRID_REV_LOONGSON3B_R2	0x0007
 #define PRID_REV_LOONGSON3A_R2	0x0008
+#define PRID_REV_LOONGSON3A_R3	0x0009
 
 /*
  * Older processors used to encode processor version and revision in two
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index e57e685..1c94d4d 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1822,6 +1822,12 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
 			break;
+		case PRID_REV_LOONGSON3A_R3:
+			c->cputype = CPU_LOONGSON3;
+			__cpu_name[cpu] = "ICT Loongson-3";
+			set_elf_platform(cpu, "loongson3a");
+			set_isa(c, MIPS_CPU_ISA_M64R2);
+			break;
 		}
 
 		decode_configs(c);
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 6afa218..4707abf 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -193,6 +193,7 @@ void __init prom_init_env(void)
 			break;
 		case PRID_REV_LOONGSON3A_R1:
 		case PRID_REV_LOONGSON3A_R2:
+		case PRID_REV_LOONGSON3A_R3:
 			cpu_clock_freq = 900000000;
 			break;
 		case PRID_REV_LOONGSON3B_R1:
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 64659fc..1629743 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -503,7 +503,7 @@ static void loongson3a_r1_play_dead(int *state_addr)
 		: "a1");
 }
 
-static void loongson3a_r2_play_dead(int *state_addr)
+static void loongson3a_r2r3_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -664,8 +664,9 @@ void play_dead(void)
 			(void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead);
 		break;
 	case PRID_REV_LOONGSON3A_R2:
+	case PRID_REV_LOONGSON3A_R3:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2r3_play_dead);
 		break;
 	case PRID_REV_LOONGSON3B_R1:
 	case PRID_REV_LOONGSON3B_R2:
diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c
index 4300a55..46ab7d86 100644
--- a/drivers/platform/mips/cpu_hwmon.c
+++ b/drivers/platform/mips/cpu_hwmon.c
@@ -17,14 +17,23 @@
  */
 int loongson3_cpu_temp(int cpu)
 {
-	u32 reg;
+	u32 reg, prid_rev;
 
 	reg = LOONGSON_CHIPTEMP(cpu);
-	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1)
+	prid_rev = read_c0_prid() & PRID_REV_MASK;
+	switch (prid_rev) {
+	case PRID_REV_LOONGSON3A_R1:
 		reg = (reg >> 8) & 0xff;
-	else
+		break;
+	case PRID_REV_LOONGSON3A_R2:
+	case PRID_REV_LOONGSON3B_R1:
+	case PRID_REV_LOONGSON3B_R2:
 		reg = ((reg >> 8) & 0xff) - 100;
-
+		break;
+	case PRID_REV_LOONGSON3A_R3:
+		reg = (reg & 0xffff)*731/0x4000 - 273;
+		break;
+	}
 	return (int)reg * 1000;
 }
 
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 2/8] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 1/8] MIPS: Loongson: Add Loongson-3A R3 basic support Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 3/8] MIPS: Loongson: Add NMI handler support Huacai Chen
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen, stable

For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
only flush Node-0's scache. So we add r4k_blast_scache_node() by using
(CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
 arch/mips/mm/c-r4k.c             | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 55fd94e..000a184 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
 __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
 __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
 
+#ifdef CONFIG_CPU_LOONGSON3
+#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize)	\
+static inline void blast_##pfx##cache##lsize##_node(long node)		\
+{									\
+	unsigned long start = CAC_BASE | (node << 44);			\
+	unsigned long end = start + current_cpu_data.desc.waysize;	\
+	unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit;	\
+	unsigned long ws_end = current_cpu_data.desc.ways <<		\
+			       current_cpu_data.desc.waybit;		\
+	unsigned long ws, addr;						\
+									\
+	__##pfx##flush_prologue						\
+									\
+	for (ws = 0; ws < ws_end; ws += ws_inc)				\
+		for (addr = start; addr < end; addr += lsize * 32)	\
+			cache##lsize##_unroll32(addr|ws, indexop);	\
+									\
+	__##pfx##flush_epilogue						\
+}
+
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
+#endif
+
 #endif /* _ASM_R4KCACHE_H */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 3fe99cb..0a49af0 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
 		r4k_blast_scache = blast_scache128;
 }
 
+static void (* r4k_blast_scache_node)(long node);
+
+static void r4k_blast_scache_node_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	r4k_blast_scache_node = (void *)cache_noop;
+#ifdef CONFIG_CPU_LOONGSON3
+	if (sc_lsize == 16)
+		r4k_blast_scache_node = blast_scache16_node;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_node = blast_scache32_node;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_node = blast_scache64_node;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_node = blast_scache128_node;
+#endif
+}
+
 static inline void local_r4k___flush_cache_all(void * args)
 {
 	switch (current_cpu_type()) {
 	case CPU_LOONGSON2:
-	case CPU_LOONGSON3:
 	case CPU_R4000SC:
 	case CPU_R4000MC:
 	case CPU_R4400SC:
@@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
 		r4k_blast_scache();
 		break;
 
+	case CPU_LOONGSON3:
+		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
+		break;
+
 	case CPU_BMIPS5000:
 		r4k_blast_scache();
 		__sync();
@@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size)
+#ifndef CONFIG_CPU_LOONGSON3
 			r4k_blast_scache();
+#else
+			r4k_blast_scache_node((addr >> 44) & 0xF);
+#endif
 		else
 			blast_scache_range(addr, addr + size);
 		preempt_enable();
@@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size)
+#ifndef CONFIG_CPU_LOONGSON3
 			r4k_blast_scache();
+#else
+			r4k_blast_scache_node((addr >> 44) & 0xF);
+#endif
 		else {
 			/*
 			 * There is no clearly documented alignment requirement
@@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
 	r4k_blast_scache_page_setup();
 	r4k_blast_scache_page_indexed_setup();
 	r4k_blast_scache_setup();
+	r4k_blast_scache_node_setup();
 #ifdef CONFIG_EVA
 	r4k_blast_dcache_user_page_setup();
 	r4k_blast_icache_user_page_setup();
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 3/8] MIPS: Loongson: Add NMI handler support
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 1/8] MIPS: Loongson: Add Loongson-3A R3 basic support Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 2/8] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 4/8] MIPS: Loongson-3: IRQ balancing for PCI devices Huacai Chen
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/loongson64/common/init.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
index 9b987fe..6ef1712 100644
--- a/arch/mips/loongson64/common/init.c
+++ b/arch/mips/loongson64/common/init.c
@@ -10,13 +10,25 @@
 
 #include <linux/bootmem.h>
 #include <asm/bootinfo.h>
+#include <asm/traps.h>
 #include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
 
 #include <loongson.h>
 
 /* Loongson CPU address windows config space base address */
 unsigned long __maybe_unused _loongson_addrwincfg_base;
 
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = (void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
 void __init prom_init(void)
 {
 #ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
@@ -40,6 +52,7 @@ void __init prom_init(void)
 	/*init the uart base address */
 	prom_init_uart_base();
 	register_smp_ops(&loongson3_smp_ops);
+	board_nmi_handler_setup = mips_nmi_setup;
 }
 
 void __init prom_free_prom_memory(void)
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 4/8] MIPS: Loongson-3: IRQ balancing for PCI devices
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
                   ` (2 preceding siblings ...)
  2017-03-29  8:24 ` [PATCH V3 3/8] MIPS: Loongson: Add NMI handler support Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 5/8] MIPS: Loongson-3: support irq_set_affinity() in i8259 chip Huacai Chen
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

IRQ0 (HPET), IRQ1 (Keyboard), IRQ2 (Cascade), IRQ7 (SCI), IRQ8 (RTC)
and IRQ12 (Mouse) are handled by core-0 locally. Other PCI IRQs (3, 4,
5, 6, 14, 15) are balanced by all cores from Node-0. This can improve
I/O performance significantly.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/loongson64/loongson-3/irq.c | 19 +++++++++++++++++--
 arch/mips/loongson64/loongson-3/smp.c | 18 +++++++++++++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
index 548f759..2e6e205 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -9,17 +9,32 @@
 
 #include "smp.h"
 
+extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
 unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
+unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
 
 static void ht_irqdispatch(void)
 {
-	unsigned int i, irq;
+	unsigned int i, irq, irq0, irq1;
+	static unsigned int dest_cpu = 0;
 
 	irq = LOONGSON_HT1_INT_VECTOR(0);
 	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
 
+	irq0 = irq & local_irq;  /* handled by local core */
+	irq1 = irq & ~local_irq; /* balanced by other cores */
+
+	if (dest_cpu == 0 || !cpu_online(dest_cpu))
+		irq0 |= irq1;
+	else
+		loongson3_send_irq_by_ipi(dest_cpu, irq1);
+
+	dest_cpu = dest_cpu + 1;
+	if (dest_cpu >= num_possible_cpus() || cpu_data[dest_cpu].package > 0)
+		dest_cpu = 0;
+
 	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-		if (irq & (0x1 << ht_irq[i]))
+		if (irq0 & (0x1 << ht_irq[i]))
 			do_IRQ(ht_irq[i]);
 	}
 }
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 1629743..b7a355c 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -254,13 +254,21 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
 		loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]);
 }
 
+#define IPI_IRQ_OFFSET 6
+
+void loongson3_send_irq_by_ipi(int cpu, int irqs)
+{
+	loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]);
+}
+
 void loongson3_ipi_interrupt(struct pt_regs *regs)
 {
 	int i, cpu = smp_processor_id();
-	unsigned int action, c0count;
+	unsigned int action, c0count, irqs;
 
 	/* Load the ipi register to figure out what we're supposed to do */
 	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	irqs = action >> IPI_IRQ_OFFSET;
 
 	/* Clear the ipi register to clear the interrupt */
 	loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]);
@@ -282,6 +290,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
 			core0_c0count[i] = c0count;
 		__wbflush(); /* Let others see the result ASAP */
 	}
+
+	if (irqs) {
+		int irq;
+		while ((irq = ffs(irqs))) {
+			do_IRQ(irq-1);
+			irqs &= ~(1<<(irq-1));
+		}
+	}
 }
 
 #define MAX_LOOPS 800
-- 
2.7.0


.

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 5/8] MIPS: Loongson-3: support irq_set_affinity() in i8259 chip
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
                   ` (3 preceding siblings ...)
  2017-03-29  8:24 ` [PATCH V3 4/8] MIPS: Loongson-3: IRQ balancing for PCI devices Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 6/8] MIPS: Loogson: Make enum loongson_cpu_type more clear Huacai Chen
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

With this patch we can set irq affinity via procfs, so as to improve
network performance.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/irq.h           |  3 ++
 arch/mips/loongson64/loongson-3/irq.c | 62 +++++++++++++++++++++++++++--------
 drivers/irqchip/irq-i8259.c           |  3 ++
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index ddd1c91..47ff7c6 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -53,6 +53,7 @@ static inline int irq_canonicalize(int irq)
 #define irq_canonicalize(irq) (irq)	/* Sane hardware, sane code ... */
 #endif
 
+struct irq_data;
 asmlinkage void plat_irq_dispatch(void);
 
 extern void do_IRQ(unsigned int irq);
@@ -63,6 +64,8 @@ extern void spurious_interrupt(void);
 extern int allocate_irqno(void);
 extern void alloc_legacy_irqno(void);
 extern void free_irqno(unsigned int irq);
+extern int plat_set_irq_affinity(struct irq_data *d,
+				 const struct cpumask *affinity, bool force);
 
 /*
  * Before R2 the timer and performance counter interrupts were both fixed to
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
index 2e6e205..e8b7a47 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -10,32 +10,68 @@
 #include "smp.h"
 
 extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
+
+unsigned int irq_cpu[16] = {[0 ... 15] = -1};
 unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
 unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
 
+int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+			  bool force)
+{
+	unsigned int cpu;
+	struct cpumask new_affinity;
+
+	/* I/O devices are connected on package-0 */
+	cpumask_copy(&new_affinity, affinity);
+	for_each_cpu(cpu, affinity)
+		if (cpu_data[cpu].package > 0)
+			cpumask_clear_cpu(cpu, &new_affinity);
+
+	if (cpumask_empty(&new_affinity))
+		return -EINVAL;
+
+	cpumask_copy(d->common->affinity, &new_affinity);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
 static void ht_irqdispatch(void)
 {
-	unsigned int i, irq, irq0, irq1;
-	static unsigned int dest_cpu = 0;
+	unsigned int i, irq;
+	struct irq_data *irqd;
+	struct cpumask affinity;
 
 	irq = LOONGSON_HT1_INT_VECTOR(0);
 	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
 
-	irq0 = irq & local_irq;  /* handled by local core */
-	irq1 = irq & ~local_irq; /* balanced by other cores */
+	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
+		if (!(irq & (0x1 << ht_irq[i])))
+			continue;
 
-	if (dest_cpu == 0 || !cpu_online(dest_cpu))
-		irq0 |= irq1;
-	else
-		loongson3_send_irq_by_ipi(dest_cpu, irq1);
+		/* handled by local core */
+		if (local_irq & (0x1 << ht_irq[i])) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
 
-	dest_cpu = dest_cpu + 1;
-	if (dest_cpu >= num_possible_cpus() || cpu_data[dest_cpu].package > 0)
-		dest_cpu = 0;
+		irqd = irq_get_irq_data(ht_irq[i]);
+		cpumask_and(&affinity, irqd->common->affinity, cpu_active_mask);
+		if (cpumask_empty(&affinity)) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
 
-	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-		if (irq0 & (0x1 << ht_irq[i]))
+		irq_cpu[ht_irq[i]] = cpumask_next(irq_cpu[ht_irq[i]], &affinity);
+		if (irq_cpu[ht_irq[i]] >= nr_cpu_ids)
+			irq_cpu[ht_irq[i]] = cpumask_first(&affinity);
+
+		if (irq_cpu[ht_irq[i]] == 0) {
 			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		/* balanced by other cores */
+		loongson3_send_irq_by_ipi(irq_cpu[ht_irq[i]], (0x1 << ht_irq[i]));
 	}
 }
 
diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c
index 1aec12c..95d21e3 100644
--- a/drivers/irqchip/irq-i8259.c
+++ b/drivers/irqchip/irq-i8259.c
@@ -46,6 +46,9 @@ static struct irq_chip i8259A_chip = {
 	.irq_disable		= disable_8259A_irq,
 	.irq_unmask		= enable_8259A_irq,
 	.irq_mask_ack		= mask_and_ack_8259A,
+#ifdef CONFIG_CPU_LOONGSON3
+	.irq_set_affinity	= plat_set_irq_affinity,
+#endif
 };
 
 /*
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 6/8] MIPS: Loogson: Make enum loongson_cpu_type more clear
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
                   ` (4 preceding siblings ...)
  2017-03-29  8:24 ` [PATCH V3 5/8] MIPS: Loongson-3: support irq_set_affinity() in i8259 chip Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 7/8] MIPS: Add __cpu_full_name[] to make CPU names more human-readable Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 8/8] MIPS: Loongson: Introduce and use LOONGSON_LLSC_WAR Huacai Chen
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

Sort enum loongson_cpu_type in a more reasonable manner, this makes the
CPU names more clear and extensible. Those already defined enum values
are renamed to Legacy_* for compatibility.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/mach-loongson64/boot_param.h | 22 ++++++++++++++++------
 arch/mips/loongson64/common/env.c                  | 11 ++++++++---
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index d3f3258..9f9bb9c 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -27,12 +27,22 @@ struct efi_memory_map_loongson {
 } __packed;
 
 enum loongson_cpu_type {
-	Loongson_2E = 0,
-	Loongson_2F = 1,
-	Loongson_3A = 2,
-	Loongson_3B = 3,
-	Loongson_1A = 4,
-	Loongson_1B = 5
+	Legacy_2E = 0x0,
+	Legacy_2F = 0x1,
+	Legacy_3A = 0x2,
+	Legacy_3B = 0x3,
+	Legacy_1A = 0x4,
+	Legacy_1B = 0x5,
+	Legacy_2G = 0x6,
+	Legacy_2H = 0x7,
+	Loongson_1A = 0x100,
+	Loongson_1B = 0x101,
+	Loongson_2E = 0x200,
+	Loongson_2F = 0x201,
+	Loongson_2G = 0x202,
+	Loongson_2H = 0x203,
+	Loongson_3A = 0x300,
+	Loongson_3B = 0x301
 };
 
 /*
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 4707abf..1e8a955 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -90,7 +90,9 @@ void __init prom_init_env(void)
 
 	cpu_clock_freq = ecpu->cpu_clock_freq;
 	loongson_sysconf.cputype = ecpu->cputype;
-	if (ecpu->cputype == Loongson_3A) {
+	switch (ecpu->cputype) {
+	case Legacy_3A:
+	case Loongson_3A:
 		loongson_sysconf.cores_per_node = 4;
 		loongson_sysconf.cores_per_package = 4;
 		smp_group[0] = 0x900000003ff01000;
@@ -111,7 +113,9 @@ void __init prom_init_env(void)
 		loongson_freqctrl[3] = 0x900030001fe001d0;
 		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
 		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
-	} else if (ecpu->cputype == Loongson_3B) {
+		break;
+	case Legacy_3B:
+	case Loongson_3B:
 		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
 		loongson_sysconf.cores_per_package = 8;
 		smp_group[0] = 0x900000003ff01000;
@@ -132,7 +136,8 @@ void __init prom_init_env(void)
 		loongson_freqctrl[3] = 0x900060001fe001d0;
 		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
 		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
-	} else {
+		break;
+	default:
 		loongson_sysconf.cores_per_node = 1;
 		loongson_sysconf.cores_per_package = 1;
 		loongson_chipcfg[0] = 0x900000001fe00180;
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 7/8] MIPS: Add __cpu_full_name[] to make CPU names more human-readable
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
                   ` (5 preceding siblings ...)
  2017-03-29  8:24 ` [PATCH V3 6/8] MIPS: Loogson: Make enum loongson_cpu_type more clear Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  2017-03-29  8:24 ` [PATCH V3 8/8] MIPS: Loongson: Introduce and use LOONGSON_LLSC_WAR Huacai Chen
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

In /proc/cpuinfo, we keep "cpu model" as is, since GCC should use it
for -march=native. Besides, we add __cpu_full_name[] to describe the
processor in a more human-readable manner. The full name is displayed
as "model name" in cpuinfo, which is needed by some userspace tools
such as gnome-system-monitor.

This is only used by Loongson now.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/cpu-info.h |  2 ++
 arch/mips/kernel/cpu-probe.c     | 13 +++++++++++++
 arch/mips/kernel/proc.c          |  4 ++++
 3 files changed, 19 insertions(+)

diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index d43b304..af72f41 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -119,7 +119,9 @@ extern void cpu_probe(void);
 extern void cpu_report(void);
 
 extern const char *__cpu_name[];
+extern const char *__cpu_full_name[];
 #define cpu_name_string()	__cpu_name[raw_smp_processor_id()]
+#define cpu_full_name_string()	__cpu_full_name[raw_smp_processor_id()]
 
 struct seq_file;
 struct notifier_block;
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 1c94d4d..8434a0e 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1468,6 +1468,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			set_elf_platform(cpu, "loongson2e");
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
+			__cpu_full_name[cpu] = "ICT Loongson-2E";
 			break;
 		case PRID_REV_LOONGSON2F:
 			c->cputype = CPU_LOONGSON2;
@@ -1475,19 +1476,28 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			set_elf_platform(cpu, "loongson2f");
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
+			__cpu_full_name[cpu] = "ICT Loongson-2F";
 			break;
 		case PRID_REV_LOONGSON3A_R1:
 			c->cputype = CPU_LOONGSON3;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R1);
+			__cpu_full_name[cpu] = "ICT Loongson-3A R1 (Loongson-3A1000)";
 			break;
 		case PRID_REV_LOONGSON3B_R1:
+			c->cputype = CPU_LOONGSON3;
+			__cpu_name[cpu] = "ICT Loongson-3";
+			set_elf_platform(cpu, "loongson3b");
+			set_isa(c, MIPS_CPU_ISA_M64R1);
+			__cpu_full_name[cpu] = "ICT Loongson-3B R1 (Loongson-3B1000)";
+			break;
 		case PRID_REV_LOONGSON3B_R2:
 			c->cputype = CPU_LOONGSON3;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3b");
 			set_isa(c, MIPS_CPU_ISA_M64R1);
+			__cpu_full_name[cpu] = "ICT Loongson-3B R2 (Loongson-3B1500)";
 			break;
 		}
 
@@ -1821,12 +1831,14 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
+			__cpu_full_name[cpu] = "ICT Loongson-3A R2 (Loongson-3A2000)";
 			break;
 		case PRID_REV_LOONGSON3A_R3:
 			c->cputype = CPU_LOONGSON3;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
+			__cpu_full_name[cpu] = "ICT Loongson-3A R3 (Loongson-3A3000)";
 			break;
 		}
 
@@ -1946,6 +1958,7 @@ EXPORT_SYMBOL(__ua_limit);
 #endif
 
 const char *__cpu_name[NR_CPUS];
+const char *__cpu_full_name[NR_CPUS];
 const char *__elf_platform;
 
 void cpu_probe(void)
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 4eff2ae..78db63a 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -14,6 +14,7 @@
 #include <asm/mipsregs.h>
 #include <asm/processor.h>
 #include <asm/prom.h>
+#include <asm/time.h>
 
 unsigned int vced_count, vcei_count;
 
@@ -62,6 +63,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_printf(m, fmt, __cpu_name[n],
 		      (version >> 4) & 0x0f, version & 0x0f,
 		      (fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
+	if (__cpu_full_name[n])
+		seq_printf(m, "model name\t\t: %s @ %uMHz\n",
+		      __cpu_full_name[n], mips_hpt_frequency / 500000);
 	seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
 		      cpu_data[n].udelay_val / (500000/HZ),
 		      (cpu_data[n].udelay_val / (5000/HZ)) % 100);
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH V3 8/8] MIPS: Loongson: Introduce and use LOONGSON_LLSC_WAR
  2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
                   ` (6 preceding siblings ...)
  2017-03-29  8:24 ` [PATCH V3 7/8] MIPS: Add __cpu_full_name[] to make CPU names more human-readable Huacai Chen
@ 2017-03-29  8:24 ` Huacai Chen
  7 siblings, 0 replies; 9+ messages in thread
From: Huacai Chen @ 2017-03-29  8:24 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen

On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
lld/scd is very weak ordering. We should add sync instructions before
each ll/lld and after the last sc/scd to workaround. Otherwise, this
flaw will cause deadlock occationally (e.g. when doing heavy load test
with LTP).

Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/atomic.h                 | 107 ++++++++++
 arch/mips/include/asm/bitops.h                 | 273 +++++++++++++++++++------
 arch/mips/include/asm/cmpxchg.h                |  54 +++++
 arch/mips/include/asm/edac.h                   |  33 ++-
 arch/mips/include/asm/futex.h                  |  62 ++++++
 arch/mips/include/asm/local.h                  |  34 +++
 arch/mips/include/asm/mach-cavium-octeon/war.h |   1 +
 arch/mips/include/asm/mach-generic/war.h       |   1 +
 arch/mips/include/asm/mach-ip22/war.h          |   1 +
 arch/mips/include/asm/mach-ip27/war.h          |   1 +
 arch/mips/include/asm/mach-ip28/war.h          |   1 +
 arch/mips/include/asm/mach-ip32/war.h          |   1 +
 arch/mips/include/asm/mach-loongson64/war.h    |  26 +++
 arch/mips/include/asm/mach-malta/war.h         |   1 +
 arch/mips/include/asm/mach-pmcs-msp71xx/war.h  |   1 +
 arch/mips/include/asm/mach-rc32434/war.h       |   1 +
 arch/mips/include/asm/mach-rm/war.h            |   1 +
 arch/mips/include/asm/mach-sibyte/war.h        |   1 +
 arch/mips/include/asm/mach-tx49xx/war.h        |   1 +
 arch/mips/include/asm/pgtable.h                |  19 ++
 arch/mips/include/asm/spinlock.h               | 142 +++++++++++++
 arch/mips/include/asm/war.h                    |   8 +
 arch/mips/kernel/syscall.c                     |  34 +++
 arch/mips/loongson64/Platform                  |   3 +
 arch/mips/mm/tlbex.c                           |  17 ++
 25 files changed, 757 insertions(+), 67 deletions(-)
 create mode 100644 arch/mips/include/asm/mach-loongson64/war.h

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0ab176b..e0002c58 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -56,6 +56,22 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
 		"	.set	mips0					\n"   \
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {		      \
+		int temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			__WEAK_LLSC_MB					      \
+			"	ll	%0, %1		# atomic_" #op "\n"   \
+			"	" #asm_op " %0, %2			\n"   \
+			"	sc	%0, %1				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
+			: "Ir" (i));					      \
+		} while (unlikely(!temp));				      \
+									      \
+		smp_llsc_mb();						      \
 	} else if (kernel_uses_llsc) {					      \
 		int temp;						      \
 									      \
@@ -97,6 +113,23 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	      \
 		: "=&r" (result), "=&r" (temp),				      \
 		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
 		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {		      \
+		int temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			__WEAK_LLSC_MB					      \
+			"	ll	%1, %2	# atomic_" #op "_return	\n"   \
+			"	" #asm_op " %0, %1, %3			\n"   \
+			"	sc	%0, %2				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (result), "=&r" (temp),			      \
+			  "+" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i));					      \
+		} while (unlikely(!result));				      \
+									      \
+		result = temp; result c_op i;				      \
 	} else if (kernel_uses_llsc) {					      \
 		int temp;						      \
 									      \
@@ -237,6 +270,26 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 		  "+" GCC_OFF_SMALL_ASM() (v->counter)
 		: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		int temp;
+
+		__asm__ __volatile__(
+		"	.set	"MIPS_ISA_LEVEL"			\n"
+		"1:				# atomic_sub_if_positive\n"
+		__WEAK_LLSC_MB
+		"	ll	%1, %2					\n"
+		"	subu	%0, %1, %3				\n"
+		"	bltz	%0, 1f					\n"
+		"	sc	%0, %2					\n"
+		"	.set	noreorder				\n"
+		"	beqz	%0, 1b					\n"
+		"	 subu	%0, %1, %3				\n"
+		"	.set	reorder					\n"
+		"1:							\n"
+		"	.set	mips0					\n"
+		: "=&r" (result), "=&r" (temp),
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "Ir" (i));
 	} else if (kernel_uses_llsc) {
 		int temp;
 
@@ -398,6 +451,22 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
 		"	.set	mips0					\n"   \
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {		      \
+		long temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			__WEAK_LLSC_MB					      \
+			"	lld	%0, %1		# atomic64_" #op "\n" \
+			"	" #asm_op " %0, %2			\n"   \
+			"	scd	%0, %1				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
+			: "Ir" (i));					      \
+		} while (unlikely(!temp));				      \
+									      \
+		smp_llsc_mb();						      \
 	} else if (kernel_uses_llsc) {					      \
 		long temp;						      \
 									      \
@@ -439,6 +508,24 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
 		: "=&r" (result), "=&r" (temp),				      \
 		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
 		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {		      \
+		long temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			__WEAK_LLSC_MB					      \
+			"	lld	%1, %2	# atomic64_" #op "_return\n"  \
+			"	" #asm_op " %0, %1, %3			\n"   \
+			"	scd	%0, %2				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (result), "=&r" (temp),			      \
+			  "=" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)	      \
+			: "memory");					      \
+		} while (unlikely(!result));				      \
+									      \
+		result = temp; result c_op i;				      \
 	} else if (kernel_uses_llsc) {					      \
 		long temp;						      \
 									      \
@@ -582,6 +669,26 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 		  "=" GCC_OFF_SMALL_ASM() (v->counter)
 		: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		long temp;
+
+		__asm__ __volatile__(
+		"	.set	"MIPS_ISA_LEVEL"			\n"
+		"1:				# atomic64_sub_if_positive\n"
+		__WEAK_LLSC_MB
+		"	lld	%1, %2					\n"
+		"	dsubu	%0, %1, %3				\n"
+		"	bltz	%0, 1f					\n"
+		"	scd	%0, %2					\n"
+		"	.set	noreorder				\n"
+		"	beqz	%0, 1b					\n"
+		"	 dsubu	%0, %1, %3				\n"
+		"	.set	reorder					\n"
+		"1:							\n"
+		"	.set	mips0					\n"
+		: "=&r" (result), "=&r" (temp),
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "Ir" (i));
 	} else if (kernel_uses_llsc) {
 		long temp;
 
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index fa57cef..6bef54a 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -68,26 +68,54 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
-		do {
-			__asm__ __volatile__(
-			"	" __LL "%0, %1		# set_bit	\n"
-			"	" __INS "%0, %3, %2, 1			\n"
-			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (bit), "r" (~0));
-		} while (unlikely(!temp));
+		if (LOONGSON_LLSC_WAR) {
+			do {
+				__asm__ __volatile__(
+				__WEAK_LLSC_MB
+				"	" __LL "%0, %1		# set_bit	\n"
+				"	" __INS "%0, %3, %2, 1			\n"
+				"	" __SC "%0, %1				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (bit), "r" (~0));
+			} while (unlikely(!temp));
+			smp_llsc_mb();
+		} else {
+			do {
+				__asm__ __volatile__(
+				"	" __LL "%0, %1		# set_bit	\n"
+				"	" __INS "%0, %3, %2, 1			\n"
+				"	" __SC "%0, %1				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (bit), "r" (~0));
+			} while (unlikely(!temp));
+		}
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 	} else if (kernel_uses_llsc) {
-		do {
-			__asm__ __volatile__(
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# set_bit	\n"
-			"	or	%0, %2				\n"
-			"	" __SC	"%0, %1				\n"
-			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (1UL << bit));
-		} while (unlikely(!temp));
+		if (LOONGSON_LLSC_WAR) {
+			do {
+				__asm__ __volatile__(
+				"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+				__WEAK_LLSC_MB
+				"	" __LL "%0, %1		# set_bit	\n"
+				"	or	%0, %2				\n"
+				"	" __SC	"%0, %1				\n"
+				"	.set	mips0				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (1UL << bit));
+			} while (unlikely(!temp));
+			smp_llsc_mb();
+		} else {
+			do {
+				__asm__ __volatile__(
+				"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+				"	" __LL "%0, %1		# set_bit	\n"
+				"	or	%0, %2				\n"
+				"	" __SC	"%0, %1				\n"
+				"	.set	mips0				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (1UL << bit));
+			} while (unlikely(!temp));
+		}
 	} else
 		__mips_set_bit(nr, addr);
 }
@@ -120,26 +148,54 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		: "ir" (~(1UL << bit)));
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
-		do {
-			__asm__ __volatile__(
-			"	" __LL "%0, %1		# clear_bit	\n"
-			"	" __INS "%0, $0, %2, 1			\n"
-			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (bit));
-		} while (unlikely(!temp));
+		if (LOONGSON_LLSC_WAR) {
+			do {
+				__asm__ __volatile__(
+				__WEAK_LLSC_MB
+				"	" __LL "%0, %1		# clear_bit	\n"
+				"	" __INS "%0, $0, %2, 1			\n"
+				"	" __SC "%0, %1				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (bit));
+			} while (unlikely(!temp));
+			smp_llsc_mb();
+		} else {
+			do {
+				__asm__ __volatile__(
+				"	" __LL "%0, %1		# clear_bit	\n"
+				"	" __INS "%0, $0, %2, 1			\n"
+				"	" __SC "%0, %1				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (bit));
+			} while (unlikely(!temp));
+		}
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 	} else if (kernel_uses_llsc) {
-		do {
-			__asm__ __volatile__(
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# clear_bit	\n"
-			"	and	%0, %2				\n"
-			"	" __SC "%0, %1				\n"
-			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (~(1UL << bit)));
-		} while (unlikely(!temp));
+		if (LOONGSON_LLSC_WAR) {
+			do {
+				__asm__ __volatile__(
+				"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+				__WEAK_LLSC_MB
+				"	" __LL "%0, %1		# clear_bit	\n"
+				"	and	%0, %2				\n"
+				"	" __SC "%0, %1				\n"
+				"	.set	mips0				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (~(1UL << bit)));
+			} while (unlikely(!temp));
+			smp_llsc_mb();
+		} else {
+			do {
+				__asm__ __volatile__(
+				"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+				"	" __LL "%0, %1		# clear_bit	\n"
+				"	and	%0, %2				\n"
+				"	" __SC "%0, %1				\n"
+				"	.set	mips0				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+				: "ir" (~(1UL << bit)));
+			} while (unlikely(!temp));
+		}
 	} else
 		__mips_clear_bit(nr, addr);
 }
@@ -184,6 +240,23 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		"	.set	mips0				\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 		: "ir" (1UL << bit));
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long temp;
+
+		do {
+			__asm__ __volatile__(
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			__WEAK_LLSC_MB
+			"	" __LL "%0, %1		# change_bit	\n"
+			"	xor	%0, %2				\n"
+			"	" __SC	"%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+			: "ir" (1UL << bit));
+		} while (unlikely(!temp));
+
+		smp_llsc_mb();
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
@@ -233,6 +306,24 @@ static inline int test_and_set_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long temp;
+
+		do {
+			__asm__ __volatile__(
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			__WEAK_LLSC_MB
+			"	" __LL "%0, %1	# test_and_set_bit	\n"
+			"	or	%2, %0, %3			\n"
+			"	" __SC	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
@@ -287,6 +378,24 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 		: "=&r" (temp), "+m" (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long temp;
+
+		do {
+			__asm__ __volatile__(
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			__WEAK_LLSC_MB
+			"	" __LL "%0, %1	# test_and_set_bit	\n"
+			"	or	%2, %0, %3			\n"
+			"	" __SC	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
@@ -348,33 +457,63 @@ static inline int test_and_clear_bit(unsigned long nr,
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		do {
-			__asm__ __volatile__(
-			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
-			"	" __EXT "%2, %0, %3, 1			\n"
-			"	" __INS "%0, $0, %3, 1			\n"
-			"	" __SC	"%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (bit)
-			: "memory");
-		} while (unlikely(!temp));
+		if (LOONGSON_LLSC_WAR) {
+			do {
+				__asm__ __volatile__(
+				__WEAK_LLSC_MB
+				"	" __LL	"%0, %1 # test_and_clear_bit	\n"
+				"	" __EXT "%2, %0, %3, 1			\n"
+				"	" __INS "%0, $0, %3, 1			\n"
+				"	" __SC	"%0, %1				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+				: "ir" (bit)
+				: "memory");
+			} while (unlikely(!temp));
+		} else {
+			do {
+				__asm__ __volatile__(
+				"	" __LL	"%0, %1 # test_and_clear_bit	\n"
+				"	" __EXT "%2, %0, %3, 1			\n"
+				"	" __INS "%0, $0, %3, 1			\n"
+				"	" __SC	"%0, %1				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+				: "ir" (bit)
+				: "memory");
+			} while (unlikely(!temp));
+		}
 #endif
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		do {
-			__asm__ __volatile__(
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	xor	%2, %3				\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
-			: "memory");
-		} while (unlikely(!res));
+		if (LOONGSON_LLSC_WAR) {
+			do {
+				__asm__ __volatile__(
+				"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+				__WEAK_LLSC_MB
+				"	" __LL	"%0, %1 # test_and_clear_bit	\n"
+				"	or	%2, %0, %3			\n"
+				"	xor	%2, %3				\n"
+				"	" __SC	"%2, %1				\n"
+				"	.set	mips0				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+				: "r" (1UL << bit)
+				: "memory");
+			} while (unlikely(!res));
+		} else {
+			do {
+				__asm__ __volatile__(
+				"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+				"	" __LL	"%0, %1 # test_and_clear_bit	\n"
+				"	or	%2, %0, %3			\n"
+				"	xor	%2, %3				\n"
+				"	" __SC	"%2, %1				\n"
+				"	.set	mips0				\n"
+				: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+				: "r" (1UL << bit)
+				: "memory");
+			} while (unlikely(!res));
+		}
 
 		res = temp & (1UL << bit);
 	} else
@@ -416,6 +555,24 @@ static inline int test_and_change_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long temp;
+
+		do {
+			__asm__ __volatile__(
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			__WEAK_LLSC_MB
+			"	" __LL	"%0, %1 # test_and_change_bit	\n"
+			"	xor	%2, %0, %3			\n"
+			"	" __SC	"\t%2, %1			\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index b71ab4a..5bfd70d 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -34,6 +34,24 @@ static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
 		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long dummy;
+
+		do {
+			__asm__ __volatile__(
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			__WEAK_LLSC_MB
+			"	ll	%0, %3		# xchg_u32	\n"
+			"	.set	mips0				\n"
+			"	move	%2, %z4				\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			"	sc	%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m),
+			  "=&r" (dummy)
+			: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
+			: "memory");
+		} while (unlikely(!dummy));
 	} else if (kernel_uses_llsc) {
 		unsigned long dummy;
 
@@ -85,6 +103,22 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
 		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long dummy;
+
+		do {
+			__asm__ __volatile__(
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			__WEAK_LLSC_MB
+			"	lld	%0, %3		# xchg_u64	\n"
+			"	move	%2, %z4				\n"
+			"	scd	%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m),
+			  "=&r" (dummy)
+			: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
+			: "memory");
+		} while (unlikely(!dummy));
 	} else if (kernel_uses_llsc) {
 		unsigned long dummy;
 
@@ -159,6 +193,26 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)		\
 		: "memory");						\
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {		\
+		__asm__ __volatile__(					\
+		"	.set	push				\n"	\
+		"	.set	noat				\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"1:				# __cmpxchg_asm \n"	\
+		__WEAK_LLSC_MB						\
+		"	" ld "	%0, %2				\n"	\
+		"	bne	%0, %z3, 2f			\n"	\
+		"	.set	mips0				\n"	\
+		"	move	$1, %z4				\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"	" st "	$1, %1				\n"	\
+		"	beqz	$1, 1b				\n"	\
+		"	.set	pop				\n"	\
+		"2:						\n"	\
+		__WEAK_LLSC_MB						\
+		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
+		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)		\
+		: "memory");						\
 	} else if (kernel_uses_llsc) {					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h
index 980b165..a864aa9 100644
--- a/arch/mips/include/asm/edac.h
+++ b/arch/mips/include/asm/edac.h
@@ -19,15 +19,30 @@ static inline void edac_atomic_scrub(void *va, u32 size)
 		 * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr));
 		 */
 
-		__asm__ __volatile__ (
-		"	.set	mips2					\n"
-		"1:	ll	%0, %1		# edac_atomic_scrub	\n"
-		"	addu	%0, $0					\n"
-		"	sc	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr)
-		: GCC_OFF_SMALL_ASM() (*virt_addr));
+		if (LOONGSON_LLSC_WAR) {
+			__asm__ __volatile__ (
+			"	.set	mips2					\n"
+			"1:				# edac_atomic_scrub	\n"
+			__WEAK_LLSC_MB
+			"	ll	%0, %1					\n"
+			"	addu	%0, $0					\n"
+			"	sc	%0, %1					\n"
+			"	beqz	%0, 1b					\n"
+			"	.set	mips0					\n"
+			: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr)
+			: GCC_OFF_SMALL_ASM() (*virt_addr));
+			smp_llsc_mb();
+		} else {
+			__asm__ __volatile__ (
+			"	.set	mips2					\n"
+			"1:	ll	%0, %1		# edac_atomic_scrub	\n"
+			"	addu	%0, $0					\n"
+			"	sc	%0, %1					\n"
+			"	beqz	%0, 1b					\n"
+			"	.set	mips0					\n"
+			: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr)
+			: GCC_OFF_SMALL_ASM() (*virt_addr));
+		}
 
 		virt_addr++;
 	}
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 1de190b..3e2741f 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -49,6 +49,37 @@
 		: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg),	\
 		  "i" (-EFAULT)						\
 		: "memory");						\
+	} else if (cpu_has_llsc && LOONGSON_LLSC_WAR) {					\
+		__asm__ __volatile__(					\
+		"	.set	push				\n"	\
+		"	.set	noat				\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"1:				 # __futex_atomic_op\n"	\
+		__WEAK_LLSC_MB						\
+		"	"user_ll("%1", "%4")"			\n"	\
+		"	.set	mips0				\n"	\
+		"	" insn	"				\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"2:	"user_sc("$1", "%2")"			\n"	\
+		"	beqz	$1, 1b				\n"	\
+		__WEAK_LLSC_MB						\
+		"3:						\n"	\
+		"	.insn					\n"	\
+		"	.set	pop				\n"	\
+		"	.set	mips0				\n"	\
+		"	.section .fixup,\"ax\"			\n"	\
+		"4:	li	%0, %6				\n"	\
+		"	j	3b				\n"	\
+		"	.previous				\n"	\
+		"	.section __ex_table,\"a\"		\n"	\
+		"	"__UA_ADDR "\t(1b + 4), 4b		\n"	\
+		"	"__UA_ADDR "\t(2b + 0), 4b		\n"	\
+		"	.previous				\n"	\
+		: "=r" (ret), "=&r" (oldval),				\
+		  "=" GCC_OFF_SMALL_ASM() (*uaddr)				\
+		: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg),	\
+		  "i" (-EFAULT)						\
+		: "memory");						\
 	} else if (cpu_has_llsc) {					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
@@ -178,6 +209,37 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
 		  "i" (-EFAULT)
 		: "memory");
+	} else if (cpu_has_llsc && LOONGSON_LLSC_WAR) {
+		__asm__ __volatile__(
+		"# futex_atomic_cmpxchg_inatomic			\n"
+		"	.set	push					\n"
+		"	.set	noat					\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"1:							\n"
+		__WEAK_LLSC_MB
+		"	"user_ll("%1", "%3")"				\n"
+		"	bne	%1, %z4, 3f				\n"
+		"	.set	mips0					\n"
+		"	move	$1, %z5					\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"2:	"user_sc("$1", "%2")"				\n"
+		"	beqz	$1, 1b					\n"
+		__WEAK_LLSC_MB
+		"3:							\n"
+		"	.insn						\n"
+		"	.set	pop					\n"
+		"	.section .fixup,\"ax\"				\n"
+		"4:	li	%0, %6					\n"
+		"	j	3b					\n"
+		"	.previous					\n"
+		"	.section __ex_table,\"a\"			\n"
+		"	"__UA_ADDR "\t(1b + 4), 4b			\n"
+		"	"__UA_ADDR "\t(2b + 0), 4b			\n"
+		"	.previous					\n"
+		: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
+		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+		  "i" (-EFAULT)
+		: "memory");
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
 		"# futex_atomic_cmpxchg_inatomic			\n"
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index 8feaed6..a6e9d06 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -44,6 +44,23 @@ static __inline__ long local_add_return(long i, local_t * l)
 		: "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
 		: "Ir" (i), "m" (l->a.counter)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long temp;
+
+		__asm__ __volatile__(
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"1:							\n"
+			__WEAK_LLSC_MB
+			__LL	"%1, %2		# local_add_return	\n"
+		"	addu	%0, %1, %3				\n"
+			__SC	"%0, %2					\n"
+		"	beqz	%0, 1b					\n"
+		"	addu	%0, %1, %3				\n"
+		"	.set	mips0					\n"
+		: "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+		: "Ir" (i), "m" (l->a.counter)
+		: "memory");
+		smp_llsc_mb();
 	} else if (kernel_uses_llsc) {
 		unsigned long temp;
 
@@ -89,6 +106,23 @@ static __inline__ long local_sub_return(long i, local_t * l)
 		: "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
 		: "Ir" (i), "m" (l->a.counter)
 		: "memory");
+	} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+		unsigned long temp;
+
+		__asm__ __volatile__(
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"1:							\n"
+			__WEAK_LLSC_MB
+			__LL	"%1, %2		# local_sub_return	\n"
+		"	subu	%0, %1, %3				\n"
+			__SC	"%0, %2					\n"
+		"	beqz	%0, 1b					\n"
+		"	subu	%0, %1, %3				\n"
+		"	.set	mips0					\n"
+		: "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+		: "Ir" (i), "m" (l->a.counter)
+		: "memory");
+		smp_llsc_mb();
 	} else if (kernel_uses_llsc) {
 		unsigned long temp;
 
diff --git a/arch/mips/include/asm/mach-cavium-octeon/war.h b/arch/mips/include/asm/mach-cavium-octeon/war.h
index 35c80be..1c43fb2 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/war.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/war.h
@@ -20,6 +20,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #define CAVIUM_OCTEON_DCACHE_PREFETCH_WAR	\
diff --git a/arch/mips/include/asm/mach-generic/war.h b/arch/mips/include/asm/mach-generic/war.h
index a1bc2e7..2dd9bf5 100644
--- a/arch/mips/include/asm/mach-generic/war.h
+++ b/arch/mips/include/asm/mach-generic/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MACH_GENERIC_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip22/war.h b/arch/mips/include/asm/mach-ip22/war.h
index fba6405..66ddafa 100644
--- a/arch/mips/include/asm/mach-ip22/war.h
+++ b/arch/mips/include/asm/mach-ip22/war.h
@@ -23,6 +23,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_IP22_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip27/war.h b/arch/mips/include/asm/mach-ip27/war.h
index 4ee0e4b..63ee1e5 100644
--- a/arch/mips/include/asm/mach-ip27/war.h
+++ b/arch/mips/include/asm/mach-ip27/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			1
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_IP27_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip28/war.h b/arch/mips/include/asm/mach-ip28/war.h
index 4821c7b..e455320 100644
--- a/arch/mips/include/asm/mach-ip28/war.h
+++ b/arch/mips/include/asm/mach-ip28/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			1
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_IP28_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip32/war.h b/arch/mips/include/asm/mach-ip32/war.h
index 9807ecd..2bd4caf 100644
--- a/arch/mips/include/asm/mach-ip32/war.h
+++ b/arch/mips/include/asm/mach-ip32/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	1
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_IP32_WAR_H */
diff --git a/arch/mips/include/asm/mach-loongson64/war.h b/arch/mips/include/asm/mach-loongson64/war.h
new file mode 100644
index 0000000..c5f9aaa
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson64/war.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2015, 2016 by Huacai Chen <chenhc@lemote.com>
+ */
+#ifndef __ASM_MIPS_MACH_LOONGSON64_WAR_H
+#define __ASM_MIPS_MACH_LOONGSON64_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+#define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		IS_ENABLED(CONFIG_CPU_LOONGSON3)
+#define MIPS34K_MISSED_ITLB_WAR		0
+
+#endif /* __ASM_MIPS_MACH_LOONGSON64_WAR_H */
diff --git a/arch/mips/include/asm/mach-malta/war.h b/arch/mips/include/asm/mach-malta/war.h
index d068fc4..c380825 100644
--- a/arch/mips/include/asm/mach-malta/war.h
+++ b/arch/mips/include/asm/mach-malta/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	1
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_MIPS_WAR_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h b/arch/mips/include/asm/mach-pmcs-msp71xx/war.h
index a60bf9d..8c5f396 100644
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h
+++ b/arch/mips/include/asm/mach-pmcs-msp71xx/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #if defined(CONFIG_PMC_MSP7120_EVAL) || defined(CONFIG_PMC_MSP7120_GW) || \
 	defined(CONFIG_PMC_MSP7120_FPGA)
 #define MIPS34K_MISSED_ITLB_WAR		1
diff --git a/arch/mips/include/asm/mach-rc32434/war.h b/arch/mips/include/asm/mach-rc32434/war.h
index 1bfd489a..72d2926 100644
--- a/arch/mips/include/asm/mach-rc32434/war.h
+++ b/arch/mips/include/asm/mach-rc32434/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_MIPS_WAR_H */
diff --git a/arch/mips/include/asm/mach-rm/war.h b/arch/mips/include/asm/mach-rm/war.h
index a3dde98..5683389 100644
--- a/arch/mips/include/asm/mach-rm/war.h
+++ b/arch/mips/include/asm/mach-rm/war.h
@@ -23,6 +23,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_RM_WAR_H */
diff --git a/arch/mips/include/asm/mach-sibyte/war.h b/arch/mips/include/asm/mach-sibyte/war.h
index 520f8fc..b9d7bcb 100644
--- a/arch/mips/include/asm/mach-sibyte/war.h
+++ b/arch/mips/include/asm/mach-sibyte/war.h
@@ -34,6 +34,7 @@ extern int sb1250_m3_workaround_needed(void);
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_SIBYTE_WAR_H */
diff --git a/arch/mips/include/asm/mach-tx49xx/war.h b/arch/mips/include/asm/mach-tx49xx/war.h
index a8e2c58..fd44710 100644
--- a/arch/mips/include/asm/mach-tx49xx/war.h
+++ b/arch/mips/include/asm/mach-tx49xx/war.h
@@ -19,6 +19,7 @@
 #define TX49XX_ICACHE_INDEX_INV_WAR	1
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
+#define LOONGSON_LLSC_WAR		0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_TX49XX_WAR_H */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9e9e944..d534185 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -228,6 +228,25 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 			"	.set	mips0				\n"
 			: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
 			: [global] "r" (page_global));
+		} else if (kernel_uses_llsc && LOONGSON_LLSC_WAR) {
+			__asm__ __volatile__ (
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			"	.set	push				\n"
+			"	.set	noreorder			\n"
+			"1:						\n"
+				__WEAK_LLSC_MB
+				__LL	"%[tmp], %[buddy]		\n"
+			"	bnez	%[tmp], 2f			\n"
+			"	 or	%[tmp], %[tmp], %[global]	\n"
+				__SC	"%[tmp], %[buddy]		\n"
+			"	beqz	%[tmp], 1b			\n"
+			"	nop					\n"
+			"2:						\n"
+			"	.set	pop				\n"
+			"	.set	mips0				\n"
+			: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
+			: [global] "r" (page_global));
+			smp_llsc_mb();
 		} else if (kernel_uses_llsc) {
 			__asm__ __volatile__ (
 			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index a8df44d..1e44b76 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -114,6 +114,41 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 		  [ticket] "=&r" (tmp),
 		  [my_ticket] "=&r" (my_ticket)
 		: [inc] "r" (inc));
+	} else if (LOONGSON_LLSC_WAR) {
+		__asm__ __volatile__ (
+		"	.set push		# arch_spin_lock	\n"
+		"	.set noreorder					\n"
+		"							\n"
+		"1:							\n"
+		__WEAK_LLSC_MB
+		"	ll	%[ticket], %[ticket_ptr]		\n"
+		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
+		"	sc	%[my_ticket], %[ticket_ptr]		\n"
+		"	beqz	%[my_ticket], 1b			\n"
+		"	 srl	%[my_ticket], %[ticket], 16		\n"
+		"	andi	%[ticket], %[ticket], 0xffff		\n"
+		"	bne	%[ticket], %[my_ticket], 4f		\n"
+		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
+		"2:							\n"
+		"	.subsection 2					\n"
+		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
+		"	sll	%[ticket], 5				\n"
+		"							\n"
+		"6:	bnez	%[ticket], 6b				\n"
+		"	 subu	%[ticket], 1				\n"
+		"							\n"
+		"	lhu	%[ticket], %[serving_now_ptr]		\n"
+		"	beq	%[ticket], %[my_ticket], 2b		\n"
+		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
+		"	b	4b					\n"
+		"	 subu	%[ticket], %[ticket], 1			\n"
+		"	.previous					\n"
+		"	.set pop					\n"
+		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
+		  [serving_now_ptr] "+m" (lock->h.serving_now),
+		  [ticket] "=&r" (tmp),
+		  [my_ticket] "=&r" (my_ticket)
+		: [inc] "r" (inc));
 	} else {
 		__asm__ __volatile__ (
 		"	.set push		# arch_spin_lock	\n"
@@ -189,6 +224,32 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 		  [my_ticket] "=&r" (tmp2),
 		  [now_serving] "=&r" (tmp3)
 		: [inc] "r" (inc));
+	} if (LOONGSON_LLSC_WAR) {
+		__asm__ __volatile__ (
+		"	.set push		# arch_spin_trylock	\n"
+		"	.set noreorder					\n"
+		"							\n"
+		"1:							\n"
+		__WEAK_LLSC_MB
+		"	ll	%[ticket], %[ticket_ptr]		\n"
+		"	srl	%[my_ticket], %[ticket], 16		\n"
+		"	andi	%[now_serving], %[ticket], 0xffff	\n"
+		"	bne	%[my_ticket], %[now_serving], 3f	\n"
+		"	 addu	%[ticket], %[ticket], %[inc]		\n"
+		"	sc	%[ticket], %[ticket_ptr]		\n"
+		"	beqz	%[ticket], 1b				\n"
+		"	 li	%[ticket], 1				\n"
+		"2:							\n"
+		"	.subsection 2					\n"
+		"3:	b	2b					\n"
+		"	 li	%[ticket], 0				\n"
+		"	.previous					\n"
+		"	.set pop					\n"
+		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
+		  [ticket] "=&r" (tmp),
+		  [my_ticket] "=&r" (tmp2),
+		  [now_serving] "=&r" (tmp3)
+		: [inc] "r" (inc));
 	} else {
 		__asm__ __volatile__ (
 		"	.set push		# arch_spin_trylock	\n"
@@ -258,6 +319,19 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
+	} else if (LOONGSON_LLSC_WAR) {
+		do {
+			__asm__ __volatile__(
+			"1:			# arch_read_lock	\n"
+			__WEAK_LLSC_MB
+			"	ll	%1, %2				\n"
+			"	bltz	%1, 1b				\n"
+			"	 addu	%1, 1				\n"
+			"2:	sc	%1, %0				\n"
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
+			: "memory");
+		} while (unlikely(!tmp));
 	} else {
 		do {
 			__asm__ __volatile__(
@@ -289,6 +363,20 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
+	} else if (LOONGSON_LLSC_WAR) {
+		do {
+			__asm__ __volatile__(
+			"1:			# arch_read_unlock	\n"
+			__WEAK_LLSC_MB
+			"	ll	%1, %2				\n"
+			"	addiu	%1, -1				\n"
+			"	sc	%1, %0				\n"
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
+			: "memory");
+		} while (unlikely(!tmp));
+
+		smp_llsc_mb();
 	} else {
 		do {
 			__asm__ __volatile__(
@@ -319,6 +407,19 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
+	} else if (LOONGSON_LLSC_WAR) {
+		do {
+			__asm__ __volatile__(
+			"1:			# arch_write_lock	\n"
+			__WEAK_LLSC_MB
+			"	ll	%1, %2				\n"
+			"	bnez	%1, 1b				\n"
+			"	 lui	%1, 0x8000			\n"
+			"2:	sc	%1, %0				\n"
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
+			: "memory");
+		} while (unlikely(!tmp));
 	} else {
 		do {
 			__asm__ __volatile__(
@@ -345,6 +446,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	: "=m" (rw->lock)
 	: "m" (rw->lock)
 	: "memory");
+
+	nudge_writes();
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
@@ -369,6 +472,27 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
+	} else if (LOONGSON_LLSC_WAR) {
+		__asm__ __volatile__(
+		"	.set	noreorder	# arch_read_trylock	\n"
+		"	li	%2, 0					\n"
+		"1:							\n"
+		__WEAK_LLSC_MB
+		"	ll	%1, %3					\n"
+		"	bltz	%1, 2f					\n"
+		"	 addu	%1, 1					\n"
+		"	sc	%1, %0					\n"
+		"	beqz	%1, 1b					\n"
+		"	 nop						\n"
+		"	.set	reorder					\n"
+		__WEAK_LLSC_MB
+		"	li	%2, 1					\n"
+		"2:							\n"
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
+		: "memory");
+
+		smp_llsc_mb();
 	} else {
 		__asm__ __volatile__(
 		"	.set	noreorder	# arch_read_trylock	\n"
@@ -413,6 +537,24 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
+	} else if (LOONGSON_LLSC_WAR) {
+		do {
+			__asm__ __volatile__(
+			__WEAK_LLSC_MB
+			"	ll	%1, %3	# arch_write_trylock	\n"
+			"	li	%2, 0				\n"
+			"	bnez	%1, 2f				\n"
+			"	lui	%1, 0x8000			\n"
+			"	sc	%1, %0				\n"
+			"	li	%2, 1				\n"
+			"2:						\n"
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
+			  "=&r" (ret)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
+			: "memory");
+		} while (unlikely(!tmp));
+
+		smp_llsc_mb();
 	} else {
 		do {
 			__asm__ __volatile__(
diff --git a/arch/mips/include/asm/war.h b/arch/mips/include/asm/war.h
index 9344e24..2fe696a 100644
--- a/arch/mips/include/asm/war.h
+++ b/arch/mips/include/asm/war.h
@@ -227,6 +227,14 @@
 #endif
 
 /*
+ * On the Loongson-2G/2H/3A/3B there is a bug that ll / sc and lld / scd is
+ * very weak ordering.
+ */
+#ifndef LOONGSON_LLSC_WAR
+#error Check setting of LOONGSON_LLSC_WAR for your platform
+#endif
+
+/*
  * 34K core erratum: "Problems Executing the TLBR Instruction"
  */
 #ifndef MIPS34K_MISSED_ITLB_WAR
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index f1d17ec..8480ac4 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -127,6 +127,40 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 		  [new] "r" (new),
 		  [efault] "i" (-EFAULT)
 		: "memory");
+	} else if (cpu_has_llsc && LOONGSON_LLSC_WAR) {
+		__asm__ __volatile__ (
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"	li	%[err], 0				\n"
+		"1:							\n"
+		__WEAK_LLSC_MB
+		"	ll	%[old], (%[addr])			\n"
+		"	move	%[tmp], %[new]				\n"
+		"2:	sc	%[tmp], (%[addr])			\n"
+		"	bnez	%[tmp], 4f				\n"
+		"3:							\n"
+		"	.insn						\n"
+		"	.subsection 2					\n"
+		"4:	b	1b					\n"
+		"	.previous					\n"
+		"							\n"
+		"	.section .fixup,\"ax\"				\n"
+		"5:	li	%[err], %[efault]			\n"
+		"	j	3b					\n"
+		"	.previous					\n"
+		"	.section __ex_table,\"a\"			\n"
+		"	"STR(PTR)"	(1b + 4), 5b			\n"
+		"	"STR(PTR)"	(2b + 0), 5b			\n"
+		"	.previous					\n"
+		"	.set	mips0					\n"
+		: [old] "=&r" (old),
+		  [err] "=&r" (err),
+		  [tmp] "=&r" (tmp)
+		: [addr] "r" (addr),
+		  [new] "r" (new),
+		  [efault] "i" (-EFAULT)
+		: "memory");
+
+		smp_llsc_mb();
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__ (
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 0fce460..3700dcf 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
 endif
 
 cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
+ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),)
+  cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc
+endif
 #
 # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
 # as MIPS64 R2; older versions as just R1.  This leaves the possibility open
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index ed1c529..2ed9a88 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -92,6 +92,11 @@ static inline int __maybe_unused r10000_llsc_war(void)
 	return R10000_LLSC_WAR;
 }
 
+static inline int __maybe_unused loongson_llsc_war(void)
+{
+       return LOONGSON_LLSC_WAR;
+}
+
 static int use_bbit_insns(void)
 {
 	switch (current_cpu_type()) {
@@ -936,6 +941,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 		 * to mimic that here by taking a load/istream page
 		 * fault.
 		 */
+		if (loongson_llsc_war())
+			uasm_i_sync(p, 0);
 		UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
 		uasm_i_jr(p, ptr);
 
@@ -1561,6 +1568,8 @@ static void build_loongson3_tlb_refill_handler(void)
 
 	if (check_for_high_segbits) {
 		uasm_l_large_segbits_fault(&l, p);
+		if (loongson_llsc_war())
+			uasm_i_sync(&p, 0);
 		UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
 		uasm_i_jr(&p, K1);
 		uasm_i_nop(&p);
@@ -1661,6 +1670,8 @@ static void
 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
 {
 #ifdef CONFIG_SMP
+	if (loongson_llsc_war())
+		uasm_i_sync(p, 0);
 # ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (cpu_has_64bits)
 		uasm_i_lld(p, pte, 0, ptr);
@@ -2242,6 +2253,8 @@ static void build_r4000_tlb_load_handler(void)
 #endif
 
 	uasm_l_nopage_tlbl(&l, p);
+	if (loongson_llsc_war())
+		uasm_i_sync(&p, 0);
 	build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
 	if ((unsigned long)tlb_do_page_fault_0 & 1) {
@@ -2297,6 +2310,8 @@ static void build_r4000_tlb_store_handler(void)
 #endif
 
 	uasm_l_nopage_tlbs(&l, p);
+	if (loongson_llsc_war())
+		uasm_i_sync(&p, 0);
 	build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
 	if ((unsigned long)tlb_do_page_fault_1 & 1) {
@@ -2353,6 +2368,8 @@ static void build_r4000_tlb_modify_handler(void)
 #endif
 
 	uasm_l_nopage_tlbm(&l, p);
+	if (loongson_llsc_war())
+		uasm_i_sync(&p, 0);
 	build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
 	if ((unsigned long)tlb_do_page_fault_1 & 1) {
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-03-29  8:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-29  8:24 [PATCH V3 0/7] MIPS: Loongson: feature and performance improvements Huacai Chen
2017-03-29  8:24 ` [PATCH V3 1/8] MIPS: Loongson: Add Loongson-3A R3 basic support Huacai Chen
2017-03-29  8:24 ` [PATCH V3 2/8] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
2017-03-29  8:24 ` [PATCH V3 3/8] MIPS: Loongson: Add NMI handler support Huacai Chen
2017-03-29  8:24 ` [PATCH V3 4/8] MIPS: Loongson-3: IRQ balancing for PCI devices Huacai Chen
2017-03-29  8:24 ` [PATCH V3 5/8] MIPS: Loongson-3: support irq_set_affinity() in i8259 chip Huacai Chen
2017-03-29  8:24 ` [PATCH V3 6/8] MIPS: Loogson: Make enum loongson_cpu_type more clear Huacai Chen
2017-03-29  8:24 ` [PATCH V3 7/8] MIPS: Add __cpu_full_name[] to make CPU names more human-readable Huacai Chen
2017-03-29  8:24 ` [PATCH V3 8/8] MIPS: Loongson: Introduce and use LOONGSON_LLSC_WAR Huacai Chen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).