linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code
@ 2010-10-16 21:22 Kevin Cernekee
  2010-10-16 21:22 ` [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig Kevin Cernekee
                   ` (9 more replies)
  0 siblings, 10 replies; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: ffainelli, mbizon, linux-mips, linux-kernel

BMIPS processor cores are used in 50+ different chipsets spread across
5+ product lines.  In many cases the chipsets do not share the same
peripheral register layouts, the same register blocks, the same
interrupt controllers, the same memory maps, or much of anything else.

But, across radically different SoCs that share nothing more than the
same BMIPS CPU, a few things are still mostly constant:

SMP operations
Access to performance counters
DMA cache coherency quirks
Cache and memory bus configuration

So, it makes sense to treat each BMIPS processor type as a generic
"building block," rather than tying it to a specific SoC.  This makes it
easier to support a large number of BMIPS-based chipsets without
unnecessary duplication of code, and provides the infrastructure needed
to support BMIPS-proprietary features.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/bcm63xx/cpu.c      |   30 ++++++++++----------
 arch/mips/include/asm/cpu.h  |   23 ++++++++-------
 arch/mips/kernel/cpu-probe.c |   62 ++++++++++++++++++++++-------------------
 arch/mips/mm/tlbex.c         |   11 +++----
 4 files changed, 65 insertions(+), 61 deletions(-)

diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c
index cbb7caf..7c7e4d4 100644
--- a/arch/mips/bcm63xx/cpu.c
+++ b/arch/mips/bcm63xx/cpu.c
@@ -10,7 +10,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/mipsregs.h>
 #include <bcm63xx_cpu.h>
 #include <bcm63xx_regs.h>
 #include <bcm63xx_io.h>
@@ -296,26 +298,24 @@ void __init bcm63xx_cpu_init(void)
 	expected_cpu_id = 0;
 
 	switch (c->cputype) {
-	/*
-	 * BCM6338 as the same PrId as BCM3302 see arch/mips/kernel/cpu-probe.c
-	 */
-	case CPU_BCM3302:
-		__cpu_name[cpu] = "Broadcom BCM6338";
-		expected_cpu_id = BCM6338_CPU_ID;
-		bcm63xx_regs_base = bcm96338_regs_base;
-		bcm63xx_irqs = bcm96338_irqs;
+	case CPU_BMIPS3300:
+		if ((read_c0_prid() & 0xff00) == PRID_IMP_BMIPS3300_ALT) {
+			expected_cpu_id = BCM6348_CPU_ID;
+			bcm63xx_regs_base = bcm96348_regs_base;
+			bcm63xx_irqs = bcm96348_irqs;
+		} else {
+			__cpu_name[cpu] = "Broadcom BCM6338";
+			expected_cpu_id = BCM6338_CPU_ID;
+			bcm63xx_regs_base = bcm96338_regs_base;
+			bcm63xx_irqs = bcm96338_irqs;
+		}
 		break;
-	case CPU_BCM6345:
+	case CPU_BMIPS32:
 		expected_cpu_id = BCM6345_CPU_ID;
 		bcm63xx_regs_base = bcm96345_regs_base;
 		bcm63xx_irqs = bcm96345_irqs;
 		break;
-	case CPU_BCM6348:
-		expected_cpu_id = BCM6348_CPU_ID;
-		bcm63xx_regs_base = bcm96348_regs_base;
-		bcm63xx_irqs = bcm96348_irqs;
-		break;
-	case CPU_BCM6358:
+	case CPU_BMIPS4350:
 		expected_cpu_id = BCM6358_CPU_ID;
 		bcm63xx_regs_base = bcm96358_regs_base;
 		bcm63xx_irqs = bcm96358_irqs;
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index b201a8f..bd2033b 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -111,14 +111,16 @@
  * These are the PRID's for when 23:16 == PRID_COMP_BROADCOM
  */
 
-#define PRID_IMP_BCM4710	0x4000
-#define PRID_IMP_BCM3302	0x9000
-#define PRID_IMP_BCM6338	0x9000
-#define PRID_IMP_BCM6345	0x8000
-#define PRID_IMP_BCM6348	0x9100
-#define PRID_IMP_BCM4350	0xA000
-#define PRID_REV_BCM6358	0x0010
-#define PRID_REV_BCM6368	0x0030
+#define PRID_IMP_BMIPS4KC	0x4000
+#define PRID_IMP_BMIPS32	0x8000
+#define PRID_IMP_BMIPS3300	0x9000
+#define PRID_IMP_BMIPS3300_ALT	0x9100
+#define PRID_IMP_BMIPS3300_BUG	0x0000
+#define PRID_IMP_BMIPS43XX	0xa000
+#define PRID_IMP_BMIPS5000	0x5a00
+
+#define PRID_REV_BMIPS4380_LO	0x0040
+#define PRID_REV_BMIPS4380_HI	0x006f
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_CAVIUM
@@ -223,9 +225,8 @@ enum cpu_type_enum {
 	 * MIPS32 class processors
 	 */
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
-	CPU_ALCHEMY, CPU_PR4450, CPU_BCM3302, CPU_BCM4710,
-	CPU_BCM6338, CPU_BCM6345, CPU_BCM6348, CPU_BCM6358,
-	CPU_JZRISC,
+	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
+	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC,
 
 	/*
 	 * MIPS64 class processors
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index b1b304e..259cbfa 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -181,10 +181,10 @@ void __init check_wait(void)
 	case CPU_5KC:
 	case CPU_25KF:
 	case CPU_PR4450:
-	case CPU_BCM3302:
-	case CPU_BCM6338:
-	case CPU_BCM6348:
-	case CPU_BCM6358:
+	case CPU_BMIPS3300:
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+	case CPU_BMIPS5000:
 	case CPU_CAVIUM_OCTEON:
 	case CPU_CAVIUM_OCTEON_PLUS:
 	case CPU_JZRISC:
@@ -902,33 +902,37 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 {
 	decode_configs(c);
 	switch (c->processor_id & 0xff00) {
-	case PRID_IMP_BCM3302:
-	 /* same as PRID_IMP_BCM6338 */
-		c->cputype = CPU_BCM3302;
-		__cpu_name[cpu] = "Broadcom BCM3302";
-		break;
-	case PRID_IMP_BCM4710:
-		c->cputype = CPU_BCM4710;
-		__cpu_name[cpu] = "Broadcom BCM4710";
-		break;
-	case PRID_IMP_BCM6345:
-		c->cputype = CPU_BCM6345;
-		__cpu_name[cpu] = "Broadcom BCM6345";
+	case PRID_IMP_BMIPS32:
+		c->cputype = CPU_BMIPS32;
+		__cpu_name[cpu] = "Broadcom BMIPS32";
+		break;
+	case PRID_IMP_BMIPS3300:
+	case PRID_IMP_BMIPS3300_ALT:
+	case PRID_IMP_BMIPS3300_BUG:
+		c->cputype = CPU_BMIPS3300;
+		__cpu_name[cpu] = "Broadcom BMIPS3300";
+		break;
+	case PRID_IMP_BMIPS43XX: {
+		int rev = c->processor_id & 0xff;
+
+		if (rev >= PRID_REV_BMIPS4380_LO &&
+				rev <= PRID_REV_BMIPS4380_HI) {
+			c->cputype = CPU_BMIPS4380;
+			__cpu_name[cpu] = "Broadcom BMIPS4380";
+		} else {
+			c->cputype = CPU_BMIPS4350;
+			__cpu_name[cpu] = "Broadcom BMIPS4350";
+		}
 		break;
-	case PRID_IMP_BCM6348:
-		c->cputype = CPU_BCM6348;
-		__cpu_name[cpu] = "Broadcom BCM6348";
+	}
+	case PRID_IMP_BMIPS5000:
+		c->cputype = CPU_BMIPS5000;
+		__cpu_name[cpu] = "Broadcom BMIPS5000";
+		c->options |= MIPS_CPU_ULRI;
 		break;
-	case PRID_IMP_BCM4350:
-		switch (c->processor_id & 0xf0) {
-		case PRID_REV_BCM6358:
-			c->cputype = CPU_BCM6358;
-			__cpu_name[cpu] = "Broadcom BCM6358";
-			break;
-		default:
-			c->cputype = CPU_UNKNOWN;
-			break;
-		}
+	case PRID_IMP_BMIPS4KC:
+		c->cputype = CPU_4KC;
+		__cpu_name[cpu] = "MIPS 4Kc";
 		break;
 	}
 }
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 4510e61..93816f3 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -338,13 +338,12 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_4KSC:
 	case CPU_20KC:
 	case CPU_25KF:
-	case CPU_BCM3302:
-	case CPU_BCM4710:
+	case CPU_BMIPS32:
+	case CPU_BMIPS3300:
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+	case CPU_BMIPS5000:
 	case CPU_LOONGSON2:
-	case CPU_BCM6338:
-	case CPU_BCM6345:
-	case CPU_BCM6348:
-	case CPU_BCM6358:
 	case CPU_R5500:
 		if (m4kc_tlbp_war())
 			uasm_i_nop(p);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-17 17:01   ` Florian Fainelli
  2010-10-16 21:22 ` [PATCH 3/9] MIPS: Add BMIPS CP0 register definitions Kevin Cernekee
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

Add processor feature definitions for BMIPS3300, BMIPS4350, BMIPS4380,
and BMIPS5000.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/Kconfig |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5526faa..1403926 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1332,6 +1332,57 @@ config CPU_CAVIUM_OCTEON
 	  can have up to 16 Mips64v2 cores and 8 integrated gigabit ethernets.
 	  Full details can be found at http://www.caviumnetworks.com.
 
+config CPU_BMIPS3300
+	bool "BMIPS3300"
+	depends on SYS_HAS_CPU_BMIPS3300
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select WEAK_ORDERING
+	help
+	  Broadcom BMIPS3300 processors.
+
+config CPU_BMIPS4350
+	bool "BMIPS4350"
+	depends on SYS_HAS_CPU_BMIPS4350
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+	select WEAK_ORDERING
+	help
+	  Broadcom BMIPS4350 processors.
+
+config CPU_BMIPS4380
+	bool "BMIPS4380"
+	depends on SYS_HAS_CPU_BMIPS4380
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+	select WEAK_ORDERING
+	help
+	  Broadcom BMIPS4380 processors.
+
+config CPU_BMIPS5000
+	bool "BMIPS5000"
+	depends on SYS_HAS_CPU_BMIPS5000
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+	select WEAK_ORDERING
+	help
+	  Broadcom BMIPS5000 processors.
+
 endchoice
 
 if CPU_LOONGSON2F
@@ -1450,6 +1501,18 @@ config SYS_HAS_CPU_SB1
 config SYS_HAS_CPU_CAVIUM_OCTEON
 	bool
 
+config SYS_HAS_CPU_BMIPS3300
+	bool
+
+config SYS_HAS_CPU_BMIPS4350
+	bool
+
+config SYS_HAS_CPU_BMIPS4380
+	bool
+
+config SYS_HAS_CPU_BMIPS5000
+	bool
+
 #
 # CPU may reorder R->R, R->W, W->R, W->W
 # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 3/9] MIPS: Add BMIPS CP0 register definitions
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
  2010-10-16 21:22 ` [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-20  7:23   ` Ralf Baechle
  2010-10-16 21:22 ` [PATCH 4/9] MIPS: Install handlers for software IRQs Kevin Cernekee
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/include/asm/mipsregs.h |   51 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 335474c..4d98709 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1040,6 +1040,12 @@ do {									\
 #define read_c0_dtaglo()	__read_32bit_c0_register($28, 2)
 #define write_c0_dtaglo(val)	__write_32bit_c0_register($28, 2, val)
 
+#define read_c0_ddatalo()	__read_32bit_c0_register($28, 3)
+#define write_c0_ddatalo(val)	__write_32bit_c0_register($28, 3, val)
+
+#define read_c0_staglo()	__read_32bit_c0_register($28, 4)
+#define write_c0_staglo(val)	__write_32bit_c0_register($28, 4, val)
+
 #define read_c0_taghi()		__read_32bit_c0_register($29, 0)
 #define write_c0_taghi(val)	__write_32bit_c0_register($29, 0, val)
 
@@ -1082,6 +1088,51 @@ do {									\
 #define read_octeon_c0_dcacheerr()	__read_64bit_c0_register($27, 1)
 #define write_octeon_c0_dcacheerr(val)	__write_64bit_c0_register($27, 1, val)
 
+/* BMIPS3300 */
+#define read_c0_brcm_config_0()		__read_32bit_c0_register($22, 0)
+#define write_c0_brcm_config_0(val)	__write_32bit_c0_register($22, 0, val)
+
+#define read_c0_brcm_bus_pll()		__read_32bit_c0_register($22, 4)
+#define write_c0_brcm_bus_pll(val)	__write_32bit_c0_register($22, 4, val)
+
+#define read_c0_brcm_reset()		__read_32bit_c0_register($22, 5)
+#define write_c0_brcm_reset(val)	__write_32bit_c0_register($22, 5, val)
+
+/* BMIPS4380 */
+#define read_c0_brcm_cmt_intr()		__read_32bit_c0_register($22, 1)
+#define write_c0_brcm_cmt_intr(val)	__write_32bit_c0_register($22, 1, val)
+
+#define read_c0_brcm_cmt_ctrl()		__read_32bit_c0_register($22, 2)
+#define write_c0_brcm_cmt_ctrl(val)	__write_32bit_c0_register($22, 2, val)
+
+#define read_c0_brcm_cmt_local()	__read_32bit_c0_register($22, 3)
+#define write_c0_brcm_cmt_local(val)	__write_32bit_c0_register($22, 3, val)
+
+#define read_c0_brcm_config_1()		__read_32bit_c0_register($22, 5)
+#define write_c0_brcm_config_1(val)	__write_32bit_c0_register($22, 5, val)
+
+#define read_c0_brcm_cbr()		__read_32bit_c0_register($22, 6)
+#define write_c0_brcm_cbr(val)		__write_32bit_c0_register($22, 6, val)
+
+/* BMIPS5000 */
+#define read_c0_brcm_config()		__read_32bit_c0_register($22, 0)
+#define write_c0_brcm_config(val)	__write_32bit_c0_register($22, 0, val)
+
+#define read_c0_brcm_mode()		__read_32bit_c0_register($22, 1)
+#define write_c0_brcm_mode(val)		__write_32bit_c0_register($22, 1, val)
+
+#define read_c0_brcm_action()		__read_32bit_c0_register($22, 2)
+#define write_c0_brcm_action(val)	__write_32bit_c0_register($22, 2, val)
+
+#define read_c0_brcm_edsp()		__read_32bit_c0_register($22, 3)
+#define write_c0_brcm_edsp(val)		__write_32bit_c0_register($22, 3, val)
+
+#define read_c0_brcm_bootvec()		__read_32bit_c0_register($22, 4)
+#define write_c0_brcm_bootvec(val)	__write_32bit_c0_register($22, 4, val)
+
+#define read_c0_brcm_sleepcount()	__read_32bit_c0_register($22, 7)
+#define write_c0_brcm_sleepcount(val)	__write_32bit_c0_register($22, 7, val)
+
 /*
  * Macros to access the floating point coprocessor control registers
  */
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 4/9] MIPS: Install handlers for software IRQs
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
  2010-10-16 21:22 ` [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig Kevin Cernekee
  2010-10-16 21:22 ` [PATCH 3/9] MIPS: Add BMIPS CP0 register definitions Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-21 14:44   ` Ralf Baechle
  2011-05-19 12:31   ` Ralf Baechle
  2010-10-16 21:22 ` [PATCH resend 5/9] MIPS: sync after cacheflush Kevin Cernekee
                   ` (6 subsequent siblings)
  9 siblings, 2 replies; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

BMIPS4350/4380/5000 CMT/SMT all use SW INT0/INT1 for inter-thread
signaling.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/kernel/irq_cpu.c |   14 ++++++--------
 1 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/mips/kernel/irq_cpu.c b/arch/mips/kernel/irq_cpu.c
index 55c8a3c..436bb2d 100644
--- a/arch/mips/kernel/irq_cpu.c
+++ b/arch/mips/kernel/irq_cpu.c
@@ -106,14 +106,12 @@ void __init mips_cpu_irq_init(void)
 	clear_c0_status(ST0_IM);
 	clear_c0_cause(CAUSEF_IP);
 
-	/*
-	 * Only MT is using the software interrupts currently, so we just
-	 * leave them uninitialized for other processors.
-	 */
-	if (cpu_has_mipsmt)
-		for (i = irq_base; i < irq_base + 2; i++)
-			set_irq_chip_and_handler(i, &mips_mt_cpu_irq_controller,
-						 handle_percpu_irq);
+	/* Software interrupts are used for MT/CMT IPI */
+	for (i = irq_base; i < irq_base + 2; i++)
+		set_irq_chip_and_handler(i, cpu_has_mipsmt ?
+					 &mips_mt_cpu_irq_controller :
+					 &mips_cpu_irq_controller,
+					 handle_percpu_irq);
 
 	for (i = irq_base + 2; i < irq_base + 8; i++)
 		set_irq_chip_and_handler(i, &mips_cpu_irq_controller,
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (2 preceding siblings ...)
  2010-10-16 21:22 ` [PATCH 4/9] MIPS: Install handlers for software IRQs Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-18 13:44   ` Shinya Kuribayashi
  2010-10-16 21:22 ` [PATCH resend 6/9] MIPS: pfn_valid() is broken on low memory HIGHMEM systems Kevin Cernekee
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

On processors with deep write buffers, it is likely that many cycles
will pass between a CACHE instruction and the time the data actually
gets written out to DRAM.  Add a SYNC instruction to ensure that the
buffers get emptied before the flush functions return.

Actual problem seen in the wild:

1) dma_alloc_coherent() allocates cached memory

2) memset() is called to clear the new pages

3) dma_cache_wback_inv() is called to flush the zero data out to memory

4) dma_alloc_coherent() returns an uncached (kseg1) pointer to the
freshly allocated pages

5) Caller writes data through the kseg1 pointer

6) Buffered writeback data finally gets flushed out to DRAM

7) Part of caller's data is inexplicably zeroed out

This patch adds SYNC between steps 3 and 4, which fixed the problem.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/mm/c-r4k.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 6721ee2..05c3de3 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -605,6 +605,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 			r4k_blast_scache();
 		else
 			blast_scache_range(addr, addr + size);
+		__sync();
 		return;
 	}
 
@@ -621,6 +622,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	}
 
 	bc_wback_inv(addr, size);
+	__sync();
 }
 
 static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
@@ -648,6 +650,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 				 (addr + size - 1) & almask);
 			blast_inv_scache_range(addr, addr + size);
 		}
+		__sync();
 		return;
 	}
 
@@ -664,6 +667,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 	}
 
 	bc_inv(addr, size);
+	__sync();
 }
 #endif /* CONFIG_DMA_NONCOHERENT */
 
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH resend 6/9] MIPS: pfn_valid() is broken on low memory HIGHMEM systems
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (3 preceding siblings ...)
  2010-10-16 21:22 ` [PATCH resend 5/9] MIPS: sync after cacheflush Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-16 21:22 ` [PATCH v2 resend 7/9] MIPS: Move FIXADDR_TOP into spaces.h Kevin Cernekee
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

pfn_valid() compares the PFN to max_mapnr:

        __pfn >= min_low_pfn && __pfn < max_mapnr;

On HIGHMEM kernels, highend_pfn is used to set the value of max_mapnr.
Unfortunately, highend_pfn is left at zero if the system does not
actually have enough RAM to reach into the HIGHMEM range.  This causes
pfn_valid() to always return false, and when debug checks are enabled
the kernel will fail catastrophically:

Memory: 22432k/32768k available (2249k kernel code, 10336k reserved, 653k data, 1352k init, 0k highmem)
NR_IRQS:128
kfree_debugcheck: out of range ptr 81c02900h.
Kernel bug detected[#1]:
Cpu 0
$ 0   : 00000000 10008400 00000034 00000000
$ 4   : 8003e160 802a0000 8003e160 00000000
$ 8   : 00000000 0000003e 00000747 00000747
...

On such a configuration, max_low_pfn should be used to set max_mapnr.

This was seen on 2.6.34.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/mm/init.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd2..18183a4 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -370,7 +370,7 @@ void __init mem_init(void)
 #ifdef CONFIG_DISCONTIGMEM
 #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
 #endif
-	max_mapnr = highend_pfn;
+	max_mapnr = highend_pfn ? : max_low_pfn;
 #else
 	max_mapnr = max_low_pfn;
 #endif
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 resend 7/9] MIPS: Move FIXADDR_TOP into spaces.h
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (4 preceding siblings ...)
  2010-10-16 21:22 ` [PATCH resend 6/9] MIPS: pfn_valid() is broken on low memory HIGHMEM systems Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-16 21:22 ` [PATCH resend 8/9] MIPS: Honor L2 bypass bit Kevin Cernekee
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

Memory maps and addressing quirks are normally defined in <spaces.h>.
There are already three targets that need to override FIXADDR_TOP, and
others exist.  This will be a cleaner approach than adding lots of
ifdefs in fixmap.h .

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/include/asm/fixmap.h              |   10 +---------
 arch/mips/include/asm/mach-bcm63xx/spaces.h |   17 +++++++++++++++++
 arch/mips/include/asm/mach-generic/spaces.h |    4 ++++
 arch/mips/include/asm/mach-tx39xx/spaces.h  |   17 +++++++++++++++++
 arch/mips/include/asm/mach-tx49xx/spaces.h  |   17 +++++++++++++++++
 5 files changed, 56 insertions(+), 9 deletions(-)
 create mode 100644 arch/mips/include/asm/mach-bcm63xx/spaces.h
 create mode 100644 arch/mips/include/asm/mach-tx39xx/spaces.h
 create mode 100644 arch/mips/include/asm/mach-tx49xx/spaces.h

diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h
index 0b89b83..98bcc98 100644
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h
@@ -14,6 +14,7 @@
 #define _ASM_FIXMAP_H
 
 #include <asm/page.h>
+#include <spaces.h>
 #ifdef CONFIG_HIGHMEM
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
@@ -67,15 +68,6 @@ enum fixed_addresses {
  * the start of the fixmap, and leave one page empty
  * at the top of mem..
  */
-#ifdef CONFIG_BCM63XX
-#define FIXADDR_TOP     ((unsigned long)(long)(int)0xff000000)
-#else
-#if defined(CONFIG_CPU_TX39XX) || defined(CONFIG_CPU_TX49XX)
-#define FIXADDR_TOP	((unsigned long)(long)(int)(0xff000000 - 0x20000))
-#else
-#define FIXADDR_TOP	((unsigned long)(long)(int)0xfffe0000)
-#endif
-#endif
 #define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
 
diff --git a/arch/mips/include/asm/mach-bcm63xx/spaces.h b/arch/mips/include/asm/mach-bcm63xx/spaces.h
new file mode 100644
index 0000000..61e750f
--- /dev/null
+++ b/arch/mips/include/asm/mach-bcm63xx/spaces.h
@@ -0,0 +1,17 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle
+ * Copyright (C) 2000, 2002  Maciej W. Rozycki
+ * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc.
+ */
+#ifndef _ASM_BCM63XX_SPACES_H
+#define _ASM_BCM63XX_SPACES_H
+
+#define FIXADDR_TOP		((unsigned long)(long)(int)0xff000000)
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_BCM63XX_SPACES_H */
diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h
index c9fa4b1..d7a9efd 100644
--- a/arch/mips/include/asm/mach-generic/spaces.h
+++ b/arch/mips/include/asm/mach-generic/spaces.h
@@ -82,4 +82,8 @@
 #define PAGE_OFFSET		(CAC_BASE + PHYS_OFFSET)
 #endif
 
+#ifndef FIXADDR_TOP
+#define FIXADDR_TOP		((unsigned long)(long)(int)0xfffe0000)
+#endif
+
 #endif /* __ASM_MACH_GENERIC_SPACES_H */
diff --git a/arch/mips/include/asm/mach-tx39xx/spaces.h b/arch/mips/include/asm/mach-tx39xx/spaces.h
new file mode 100644
index 0000000..151fe7a
--- /dev/null
+++ b/arch/mips/include/asm/mach-tx39xx/spaces.h
@@ -0,0 +1,17 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle
+ * Copyright (C) 2000, 2002  Maciej W. Rozycki
+ * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc.
+ */
+#ifndef _ASM_TX39XX_SPACES_H
+#define _ASM_TX39XX_SPACES_H
+
+#define FIXADDR_TOP		((unsigned long)(long)(int)0xfefe0000)
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_TX39XX_SPACES_H */
diff --git a/arch/mips/include/asm/mach-tx49xx/spaces.h b/arch/mips/include/asm/mach-tx49xx/spaces.h
new file mode 100644
index 0000000..0cb10a6
--- /dev/null
+++ b/arch/mips/include/asm/mach-tx49xx/spaces.h
@@ -0,0 +1,17 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle
+ * Copyright (C) 2000, 2002  Maciej W. Rozycki
+ * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc.
+ */
+#ifndef _ASM_TX49XX_SPACES_H
+#define _ASM_TX49XX_SPACES_H
+
+#define FIXADDR_TOP		((unsigned long)(long)(int)0xfefe0000)
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_TX49XX_SPACES_H */
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH resend 8/9] MIPS: Honor L2 bypass bit
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (5 preceding siblings ...)
  2010-10-16 21:22 ` [PATCH v2 resend 7/9] MIPS: Move FIXADDR_TOP into spaces.h Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-19 16:16   ` Ralf Baechle
  2010-10-16 21:22 ` [PATCH resend 9/9] MIPS: Allow UserLocal on MIPS_R1 processors Kevin Cernekee
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

If CP0 CONFIG2 bit 12 (L2B) is set, the L2 cache is disabled and
therefore Linux should not attempt to use it.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/mm/sc-mips.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 5ab5fa8..d072b25 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -79,6 +79,11 @@ static inline int __init mips_sc_probe(void)
 		return 0;
 
 	config2 = read_c0_config2();
+
+	/* bypass bit */
+	if (config2 & (1 << 12))
+		return 0;
+
 	tmp = (config2 >> 4) & 0x0f;
 	if (0 < tmp && tmp <= 7)
 		c->scache.linesz = 2 << tmp;
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH resend 9/9] MIPS: Allow UserLocal on MIPS_R1 processors
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (6 preceding siblings ...)
  2010-10-16 21:22 ` [PATCH resend 8/9] MIPS: Honor L2 bypass bit Kevin Cernekee
@ 2010-10-16 21:22 ` Kevin Cernekee
  2010-10-21 14:32   ` Ralf Baechle
  2010-10-17 16:59 ` [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Florian Fainelli
  2010-10-20  7:19 ` Ralf Baechle
  9 siblings, 1 reply; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-16 21:22 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips, linux-kernel

Some MIPS32R1 processors implement UserLocal (RDHWR $29) to accelerate
programs that make extensive use of thread-local storage.  Therefore,
setting up the HWRENA register should not depend on cpu_has_mips_r2.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
---
 arch/mips/kernel/traps.c |   13 +++++++------
 1 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 03ec001..ec6cbd2 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1469,6 +1469,7 @@ void __cpuinit per_cpu_trap_init(void)
 {
 	unsigned int cpu = smp_processor_id();
 	unsigned int status_set = ST0_CU0;
+	unsigned int hwrena = cpu_hwrena_impl_bits;
 #ifdef CONFIG_MIPS_MT_SMTC
 	int secondaryTC = 0;
 	int bootTC = (cpu == 0);
@@ -1501,14 +1502,14 @@ void __cpuinit per_cpu_trap_init(void)
 	change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX,
 			 status_set);
 
-	if (cpu_has_mips_r2) {
-		unsigned int enable = 0x0000000f | cpu_hwrena_impl_bits;
+	if (cpu_has_mips_r2)
+		hwrena |= 0x0000000f;
 
-		if (!noulri && cpu_has_userlocal)
-			enable |= (1 << 29);
+	if (!noulri && cpu_has_userlocal)
+		hwrena |= (1 << 29);
 
-		write_c0_hwrena(enable);
-	}
+	if (hwrena)
+		write_c0_hwrena(hwrena);
 
 #ifdef CONFIG_MIPS_MT_SMTC
 	if (!secondaryTC) {
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (7 preceding siblings ...)
  2010-10-16 21:22 ` [PATCH resend 9/9] MIPS: Allow UserLocal on MIPS_R1 processors Kevin Cernekee
@ 2010-10-17 16:59 ` Florian Fainelli
  2010-10-20  7:19 ` Ralf Baechle
  9 siblings, 0 replies; 36+ messages in thread
From: Florian Fainelli @ 2010-10-17 16:59 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Ralf Baechle, mbizon, linux-mips, linux-kernel

Hello Kevin,

Le Saturday 16 October 2010 23:22:30, Kevin Cernekee a écrit :
> BMIPS processor cores are used in 50+ different chipsets spread across
> 5+ product lines.  In many cases the chipsets do not share the same
> peripheral register layouts, the same register blocks, the same
> interrupt controllers, the same memory maps, or much of anything else.
> 
> But, across radically different SoCs that share nothing more than the
> same BMIPS CPU, a few things are still mostly constant:
> 
> SMP operations
> Access to performance counters
> DMA cache coherency quirks
> Cache and memory bus configuration
> 
> So, it makes sense to treat each BMIPS processor type as a generic
> "building block," rather than tying it to a specific SoC.  This makes it
> easier to support a large number of BMIPS-based chipsets without
> unnecessary duplication of code, and provides the infrastructure needed
> to support BMIPS-proprietary features.
> 
> Signed-off-by: Kevin Cernekee <cernekee@gmail.com>

I boot tested all of your nine patches on a BCM6348 system without problems.

Tested-by: Florian Fainelli <ffainelli@freebox.fr>
--
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig
  2010-10-16 21:22 ` [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig Kevin Cernekee
@ 2010-10-17 17:01   ` Florian Fainelli
  0 siblings, 0 replies; 36+ messages in thread
From: Florian Fainelli @ 2010-10-17 17:01 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Ralf Baechle, linux-mips, linux-kernel

Hello Kevin,

Le Saturday 16 October 2010 23:22:31, Kevin Cernekee a écrit :
> Add processor feature definitions for BMIPS3300, BMIPS4350, BMIPS4380,
> and BMIPS5000.
> 
> Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
> ---
>  arch/mips/Kconfig |   63
> +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 63
> insertions(+), 0 deletions(-)
> 
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 5526faa..1403926 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1332,6 +1332,57 @@ config CPU_CAVIUM_OCTEON
>  	  can have up to 16 Mips64v2 cores and 8 integrated gigabit ethernets.
>  	  Full details can be found at http://www.caviumnetworks.com.
> 
> +config CPU_BMIPS3300
> +	bool "BMIPS3300"
> +	depends on SYS_HAS_CPU_BMIPS3300
> +	select DMA_NONCOHERENT
> +	select IRQ_CPU
> +	select SWAP_IO_SPACE
> +	select SYS_SUPPORTS_32BIT_KERNEL
> +	select WEAK_ORDERING
> +	help
> +	  Broadcom BMIPS3300 processors.
> +
> +config CPU_BMIPS4350
> +	bool "BMIPS4350"
> +	depends on SYS_HAS_CPU_BMIPS4350
> +	select CPU_SUPPORTS_32BIT_KERNEL
> +	select DMA_NONCOHERENT
> +	select IRQ_CPU
> +	select SWAP_IO_SPACE
> +	select SYS_SUPPORTS_SMP
> +	select SYS_SUPPORTS_HOTPLUG_CPU
> +	select WEAK_ORDERING
> +	help
> +	  Broadcom BMIPS4350 processors.

May I suggest to include the marketed name "Viper" here to help people know where this CPU block can be found?

> +
> +config CPU_BMIPS4380
> +	bool "BMIPS4380"
> +	depends on SYS_HAS_CPU_BMIPS4380
> +	select CPU_SUPPORTS_32BIT_KERNEL
> +	select DMA_NONCOHERENT
> +	select IRQ_CPU
> +	select SWAP_IO_SPACE
> +	select SYS_SUPPORTS_SMP
> +	select SYS_SUPPORTS_HOTPLUG_CPU
> +	select WEAK_ORDERING
> +	help
> +	  Broadcom BMIPS4380 processors.
> +
> +config CPU_BMIPS5000
> +	bool "BMIPS5000"
> +	depends on SYS_HAS_CPU_BMIPS5000
> +	select CPU_SUPPORTS_32BIT_KERNEL
> +	select CPU_SUPPORTS_HIGHMEM
> +	select DMA_NONCOHERENT
> +	select IRQ_CPU
> +	select SWAP_IO_SPACE
> +	select SYS_SUPPORTS_SMP
> +	select SYS_SUPPORTS_HOTPLUG_CPU
> +	select WEAK_ORDERING
> +	help
> +	  Broadcom BMIPS5000 processors.
> +
>  endchoice
> 
>  if CPU_LOONGSON2F
> @@ -1450,6 +1501,18 @@ config SYS_HAS_CPU_SB1
>  config SYS_HAS_CPU_CAVIUM_OCTEON
>  	bool
> 
> +config SYS_HAS_CPU_BMIPS3300
> +	bool
> +
> +config SYS_HAS_CPU_BMIPS4350
> +	bool
> +
> +config SYS_HAS_CPU_BMIPS4380
> +	bool
> +
> +config SYS_HAS_CPU_BMIPS5000
> +	bool
> +
>  #
>  # CPU may reorder R->R, R->W, W->R, W->W
>  # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-16 21:22 ` [PATCH resend 5/9] MIPS: sync after cacheflush Kevin Cernekee
@ 2010-10-18 13:44   ` Shinya Kuribayashi
  2010-10-18 18:34     ` Kevin Cernekee
  2010-10-18 19:19     ` Ralf Baechle
  0 siblings, 2 replies; 36+ messages in thread
From: Shinya Kuribayashi @ 2010-10-18 13:44 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Ralf Baechle, linux-mips, linux-kernel

On 10/17/10 6:22 AM, Kevin Cernekee wrote:
> On processors with deep write buffers, it is likely that many cycles
> will pass between a CACHE instruction and the time the data actually
> gets written out to DRAM.  Add a SYNC instruction to ensure that the
> buffers get emptied before the flush functions return.
>
> Actual problem seen in the wild:
>
> 1) dma_alloc_coherent() allocates cached memory
>
> 2) memset() is called to clear the new pages
>
> 3) dma_cache_wback_inv() is called to flush the zero data out to memory
>
> 4) dma_alloc_coherent() returns an uncached (kseg1) pointer to the
> freshly allocated pages
>
> 5) Caller writes data through the kseg1 pointer
>
> 6) Buffered writeback data finally gets flushed out to DRAM
>
> 7) Part of caller's data is inexplicably zeroed out
>
> This patch adds SYNC between steps 3 and 4, which fixed the problem.
>
> Signed-off-by: Kevin Cernekee<cernekee@gmail.com>
> ---
>   arch/mips/mm/c-r4k.c |    4 ++++
>   1 files changed, 4 insertions(+), 0 deletions(-)
>
> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
> index 6721ee2..05c3de3 100644
> --- a/arch/mips/mm/c-r4k.c
> +++ b/arch/mips/mm/c-r4k.c
> @@ -605,6 +605,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
>   			r4k_blast_scache();
>   		else
>   			blast_scache_range(addr, addr + size);
> +		__sync();
>   		return;
>   	}
>

Basically, agreed.  I have similar workarounds when initiating DMA,
where we need to flush out data to DRAM before starting DMA trans-
actions.  Looks like similar situations.

But I have a concern.

I suspect that SYNC insn alone is still not enough, insn't it?  In
such systems with that 'deep' write buffer and data incoherency is
visibly observed, there sill may be data write transactions floating
in the internal bus system.

To make sure that all data (data inside processor's write buffer and
data floating in the internal bus system), we need the following
three steps:

1. Flush data cache
2. Uncached, dummy load operation from _DRAM_ (not somewhere else)
3. then SYNC instruction

With these steps, data in write buffer will be pushed out of the
processor's write buffer, wait for uncached load operation to be
completed, and then finally the pipeline gets cleared.  Thoughts?

   Shinya

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 13:44   ` Shinya Kuribayashi
@ 2010-10-18 18:34     ` Kevin Cernekee
  2010-10-19  0:03       ` Shinya Kuribayashi
  2010-10-19  0:57       ` Maciej W. Rozycki
  2010-10-18 19:19     ` Ralf Baechle
  1 sibling, 2 replies; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-18 18:34 UTC (permalink / raw)
  To: Shinya Kuribayashi; +Cc: Ralf Baechle, linux-mips, linux-kernel

On Mon, Oct 18, 2010 at 6:44 AM, Shinya Kuribayashi <skuribay@pobox.com> wrote:
> I suspect that SYNC insn alone is still not enough, insn't it?  In
> such systems with that 'deep' write buffer and data incoherency is
> visibly observed, there sill may be data write transactions floating
> in the internal bus system.
>
> To make sure that all data (data inside processor's write buffer and
> data floating in the internal bus system), we need the following
> three steps:
>
> 1. Flush data cache
> 2. Uncached, dummy load operation from _DRAM_ (not somewhere else)
> 3. then SYNC instruction

Some systems do require additional steps along those lines, e.g.

# ifdef CONFIG_SGI_IP28
#  define fast_iob()				\
	__asm__ __volatile__(			\
		".set	push\n\t"		\
		".set	noreorder\n\t"		\
		"lw	$0,%0\n\t"		\
		"sync\n\t"			\
		"lw	$0,%0\n\t"		\
		".set	pop"			\
		: /* no output */		\
		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
		: "memory")

Maybe it would be better to use iob() instead of __sync() directly, so
that it is easy to add extra steps for the CPUs that need them.  DEC
and Loongson have custom __wbflush() implementations, and something
similar could be added for your processor to implement the uncached
dummy load.

What do you think?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 13:44   ` Shinya Kuribayashi
  2010-10-18 18:34     ` Kevin Cernekee
@ 2010-10-18 19:19     ` Ralf Baechle
  2010-10-18 19:41       ` Kevin Cernekee
  1 sibling, 1 reply; 36+ messages in thread
From: Ralf Baechle @ 2010-10-18 19:19 UTC (permalink / raw)
  To: Shinya Kuribayashi; +Cc: Kevin Cernekee, linux-mips, linux-kernel

On Mon, Oct 18, 2010 at 10:44:46PM +0900, Shinya Kuribayashi wrote:

> I suspect that SYNC insn alone is still not enough, insn't it?  In
> such systems with that 'deep' write buffer and data incoherency is
> visibly observed, there sill may be data write transactions floating
> in the internal bus system.

A SYNC in theory should ensure global visibilty of preceding writes and
completion of earlier reads.  That usually works between CPUs but not
all I/O systems fully participate in that "consistency domain" so more
or less arbitary shaking of the I/O system may still be required to to
achieve consistency.

> To make sure that all data (data inside processor's write buffer and
> data floating in the internal bus system), we need the following
> three steps:
> 
> 1. Flush data cache
> 2. Uncached, dummy load operation from _DRAM_ (not somewhere else)
> 3. then SYNC instruction
> 
> With these steps, data in write buffer will be pushed out of the
> processor's write buffer, wait for uncached load operation to be
> completed, and then finally the pipeline gets cleared.  Thoughts?

I'm trying to get a statement from the MIPS architecture guys if the
necessity to do anything beyond a cache flush is an architecture violation.

Don't worry, I'm not going to refuse patches for something just because
it's not complying to a piece of paper as long as the silicon is in the
wild.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 19:19     ` Ralf Baechle
@ 2010-10-18 19:41       ` Kevin Cernekee
  2010-10-18 22:50         ` Ralf Baechle
  2010-10-19  8:54         ` Gleb O. Raiko
  0 siblings, 2 replies; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-18 19:41 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Shinya Kuribayashi, linux-mips, linux-kernel

On Mon, Oct 18, 2010 at 12:19 PM, Ralf Baechle <ralf@linux-mips.org> wrote:
> I'm trying to get a statement from the MIPS architecture guys if the
> necessity to do anything beyond a cache flush is an architecture violation.

IMO such a requirement would be unnecessarily strict.  Larger flushes
(e.g. page at a time) tend to benefit from some form of pipelining or
write gathering.  Forcing the processor to flush exactly 32 bytes at a
time, synchronously, could really slow things down and thrash the
memory controller.

I have not been able to find any official statement from MIPS that
says that CACHE + SYNC should be used, but that seems like the most
intuitive way to implement things on the hardware side.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 19:41       ` Kevin Cernekee
@ 2010-10-18 22:50         ` Ralf Baechle
  2010-10-19  0:45           ` Maciej W. Rozycki
  2010-10-19  8:54         ` Gleb O. Raiko
  1 sibling, 1 reply; 36+ messages in thread
From: Ralf Baechle @ 2010-10-18 22:50 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Shinya Kuribayashi, linux-mips, linux-kernel

On Mon, Oct 18, 2010 at 12:41:20PM -0700, Kevin Cernekee wrote:

> On Mon, Oct 18, 2010 at 12:19 PM, Ralf Baechle <ralf@linux-mips.org> wrote:
> > I'm trying to get a statement from the MIPS architecture guys if the
> > necessity to do anything beyond a cache flush is an architecture violation.
> 
> IMO such a requirement would be unnecessarily strict.  Larger flushes
> (e.g. page at a time) tend to benefit from some form of pipelining or
> write gathering.  Forcing the processor to flush exactly 32 bytes at a
> time, synchronously, could really slow things down and thrash the
> memory controller.
> 
> I have not been able to find any official statement from MIPS that
> says that CACHE + SYNC should be used, but that seems like the most
> intuitive way to implement things on the hardware side.

I agree with you but I seem to remember having read something that suggests
otherwise.  Oh well, maybe it's just something in the Cambridge water
that makes my halocinate ;)

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 18:34     ` Kevin Cernekee
@ 2010-10-19  0:03       ` Shinya Kuribayashi
  2010-10-19  0:51         ` Kevin Cernekee
  2010-10-19  0:57       ` Maciej W. Rozycki
  1 sibling, 1 reply; 36+ messages in thread
From: Shinya Kuribayashi @ 2010-10-19  0:03 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Shinya Kuribayashi, Ralf Baechle, linux-mips, linux-kernel

On 10/19/2010 3:34 AM, Kevin Cernekee wrote:
> On Mon, Oct 18, 2010 at 6:44 AM, Shinya Kuribayashi <skuribay@pobox.com> wrote:
>> I suspect that SYNC insn alone is still not enough, insn't it?  In
>> such systems with that 'deep' write buffer and data incoherency is
>> visibly observed, there sill may be data write transactions floating
>> in the internal bus system.
>>
>> To make sure that all data (data inside processor's write buffer and
>> data floating in the internal bus system), we need the following
>> three steps:
>>
>> 1. Flush data cache
>> 2. Uncached, dummy load operation from _DRAM_ (not somewhere else)
>> 3. then SYNC instruction
> 
> Some systems do require additional steps along those lines, e.g.
> 
> # ifdef CONFIG_SGI_IP28
> #  define fast_iob()				\
> 	__asm__ __volatile__(			\
> 		".set	push\n\t"		\
> 		".set	noreorder\n\t"		\
> 		"lw	$0,%0\n\t"		\
> 		"sync\n\t"			\
> 		"lw	$0,%0\n\t"		\
> 		".set	pop"			\
> 		: /* no output */		\
> 		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
> 		: "memory")
> 
> Maybe it would be better to use iob() instead of __sync() directly, so
> that it is easy to add extra steps for the CPUs that need them.  DEC
> and Loongson have custom __wbflush() implementations, and something
> similar could be added for your processor to implement the uncached
> dummy load.

I was jumping to conclusions the issue you're facing with is related to
DMA operation.  If so, yes, we need to sync with I/O systems (namely
with DRAM in this case) at some point, prior to initiating DMA.

But getting back to your original scenario, it seems not; at least, I
failed to see a connection with DMA operations.  I wonder why and how
steps through 1-to-7 will be problem.

> Actual problem seen in the wild:
> 
> 1) dma_alloc_coherent() allocates cached memory
> 
> 2) memset() is called to clear the new pages
> 
> 3) dma_cache_wback_inv() is called to flush the zero data out to memory

At this point, write-backed data will go into a queue of 'deep' write
buffer, and will be pushed out to the internal bus system (queued).

> 4) dma_alloc_coherent() returns an uncached (kseg1) pointer to the
> freshly allocated pages
> 
> 5) Caller writes data through the kseg1 pointer

This 'write through KSEG1 segment' operation also goes into a queue of
'deep' write buffer, doesn't it?

> 6) Buffered writeback data finally gets flushed out to DRAM
> 
> 7) Part of caller's data is inexplicably zeroed out
> 
> This patch adds SYNC between steps 3 and 4, which fixed the problem.

IIUC, the problem is that write operation originating from step 5. seems
to overtake the one originating from step 3., correct?

Then we'd like to know, what is that 'Caller mentioned at step 5.', and
what kind of operation will be done by the Caller?
-- 
Shinya Kuribayashi
Renesas Electronics

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 22:50         ` Ralf Baechle
@ 2010-10-19  0:45           ` Maciej W. Rozycki
  0 siblings, 0 replies; 36+ messages in thread
From: Maciej W. Rozycki @ 2010-10-19  0:45 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Kevin Cernekee, Shinya Kuribayashi, linux-mips, linux-kernel

On Mon, 18 Oct 2010, Ralf Baechle wrote:

> I agree with you but I seem to remember having read something that suggests
> otherwise.  Oh well, maybe it's just something in the Cambridge water
> that makes my halocinate ;)

 Oh, come on!  Our water is reportedly among the best in the country.  If 
anyone is hallucinating, it must be everyone else! ;)

  Maciej

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19  0:03       ` Shinya Kuribayashi
@ 2010-10-19  0:51         ` Kevin Cernekee
  2010-10-19 13:30           ` Shinya Kuribayashi
  0 siblings, 1 reply; 36+ messages in thread
From: Kevin Cernekee @ 2010-10-19  0:51 UTC (permalink / raw)
  To: Shinya Kuribayashi
  Cc: Shinya Kuribayashi, Ralf Baechle, linux-mips, linux-kernel

On Mon, Oct 18, 2010 at 5:03 PM, Shinya Kuribayashi
<shinya.kuribayashi.px@renesas.com> wrote:
> IIUC, the problem is that write operation originating from step 5. seems
> to overtake the one originating from step 3., correct?

Correct.  This particular system makes no guarantees that data flushed
out through CACHE operations will not overtake subsequent uncached
stores.

For the case of DMA, it is possible that data that I am attempting to
send to a device (via DRAM) could still be in transit when
dma_cache_wback() returns, and may be incomplete when the DMA
operation starts.  Or that dirty cachelines that I am attempting to
"free up" for a DMA_FROM_DEVICE operation are still in transit when
dma_cache_wback_inv() returns, potentially clobbering whatever data
the peripheral is trying to write to memory.

Adding SYNC at the end of dma_cache_wback* guarantees that the write
buffers have been emptied out to DRAM and I do not have to worry
anymore about any of these cases.

> Then we'd like to know, what is that 'Caller mentioned at step 5.', and
> what kind of operation will be done by the Caller?

It is my recollection that the caller was the USB EHCI driver, and it
was allocating some sort of uncached descriptor block that contained
pointers.  Sometimes those pointers got inexplicably zeroed out, and
this is what we found to be the root cause.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 18:34     ` Kevin Cernekee
  2010-10-19  0:03       ` Shinya Kuribayashi
@ 2010-10-19  0:57       ` Maciej W. Rozycki
  2010-10-19 12:34         ` Ralf Baechle
  1 sibling, 1 reply; 36+ messages in thread
From: Maciej W. Rozycki @ 2010-10-19  0:57 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Shinya Kuribayashi, Ralf Baechle, linux-mips, linux-kernel

On Mon, 18 Oct 2010, Kevin Cernekee wrote:

> Some systems do require additional steps along those lines, e.g.
> 
> # ifdef CONFIG_SGI_IP28
> #  define fast_iob()				\
> 	__asm__ __volatile__(			\
> 		".set	push\n\t"		\
> 		".set	noreorder\n\t"		\
> 		"lw	$0,%0\n\t"		\
> 		"sync\n\t"			\
> 		"lw	$0,%0\n\t"		\
> 		".set	pop"			\
> 		: /* no output */		\
> 		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
> 		: "memory")
> 
> Maybe it would be better to use iob() instead of __sync() directly, so
> that it is easy to add extra steps for the CPUs that need them.  DEC
> and Loongson have custom __wbflush() implementations, and something
> similar could be added for your processor to implement the uncached
> dummy load.

 Ah, the old issue of the write-back barrier.  I can't comment on 
Loongson, but for DEC IIRC the write-back buffer only needs to be taken 
care of for uncached writes and they take a path separate to cached 
writes.  I'd have to dig out the details to be sure.  IIRC the most 
pathological case was the R2020 WB chip, but that was only used on systems 
that didn't do DMA (namely DECstatation 3100 and 2100 boxes).

  Maciej

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-18 19:41       ` Kevin Cernekee
  2010-10-18 22:50         ` Ralf Baechle
@ 2010-10-19  8:54         ` Gleb O. Raiko
  2010-10-19  9:17           ` Ralf Baechle
  1 sibling, 1 reply; 36+ messages in thread
From: Gleb O. Raiko @ 2010-10-19  8:54 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Ralf Baechle, Shinya Kuribayashi, linux-mips, linux-kernel

On 18.10.2010 23:41, Kevin Cernekee wrote:
> I have not been able to find any official statement from MIPS that
> says that CACHE + SYNC should be used, but that seems like the most
> intuitive way to implement things on the hardware side.

Indeed, both Architecture for Programmers in Vol. 2 describing 
instruction sets not so clearly say that sync is needed after cache.
For example, documents with rev. 2.62, p. 92 (for MIPS32 ISA) or p. 96 
(for MIPS64).

Considering whether just sync enough I'd like to note some boxes may 
implement dma master and slave blocks to be unsynchronized. Also,there 
may be write buffers somewhere in the path between cpu, memory, and even 
a dma master.

BTW, we have plat_extra_sync_for_device which has appropriate name but 
invented to do things before cache flush. :-) It seems we need another 
one which will do something after.

Gleb.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19  8:54         ` Gleb O. Raiko
@ 2010-10-19  9:17           ` Ralf Baechle
  2010-10-19 10:15             ` Gleb O. Raiko
  0 siblings, 1 reply; 36+ messages in thread
From: Ralf Baechle @ 2010-10-19  9:17 UTC (permalink / raw)
  To: Gleb O. Raiko
  Cc: Kevin Cernekee, Shinya Kuribayashi, linux-mips, linux-kernel, chris

On Tue, Oct 19, 2010 at 12:54:33PM +0400, Gleb O. Raiko wrote:

> On 18.10.2010 23:41, Kevin Cernekee wrote:
> >I have not been able to find any official statement from MIPS that
> >says that CACHE + SYNC should be used, but that seems like the most
> >intuitive way to implement things on the hardware side.
> 
> Indeed, both Architecture for Programmers in Vol. 2 describing
> instruction sets not so clearly say that sync is needed after cache.
> For example, documents with rev. 2.62, p. 92 (for MIPS32 ISA) or p.
> 96 (for MIPS64).

The MIPS32 BIS v2.6 spec says on page 92:

  "The CACHE instruction and the memory transactions which are sourced by
   the CACHE instruction, such as cache refill or cache writeback, obey
   the ordering and completion rules of the SYNC instruction."

That's not as clearly spelt out as one would like but it seems to imply
that only reads/writes preceeding the CACHE instruction are guaranteed
to have completed that is the last CACHE instruction that was executed
may still be incomplete.

> Considering whether just sync enough I'd like to note some boxes may
> implement dma master and slave blocks to be unsynchronized.
> Also,there may be write buffers somewhere in the path between cpu,
> memory, and even a dma master.
> 
> BTW, we have plat_extra_sync_for_device which has appropriate name
> but invented to do things before cache flush. :-) It seems we need
> another one which will do something after.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19  9:17           ` Ralf Baechle
@ 2010-10-19 10:15             ` Gleb O. Raiko
  0 siblings, 0 replies; 36+ messages in thread
From: Gleb O. Raiko @ 2010-10-19 10:15 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: Kevin Cernekee, Shinya Kuribayashi, linux-mips, linux-kernel, chris



On 19.10.2010 13:17, Ralf Baechle wrote:
> On Tue, Oct 19, 2010 at 12:54:33PM +0400, Gleb O. Raiko wrote:
> The MIPS32 BIS v2.6 spec says on page 92:
>
>    "The CACHE instruction and the memory transactions which are sourced by
>     the CACHE instruction, such as cache refill or cache writeback, obey
>     the ordering and completion rules of the SYNC instruction."
>
> That's not as clearly spelt out as one would like but it seems to imply
> that only reads/writes preceeding the CACHE instruction are guaranteed
> to have completed that is the last CACHE instruction that was executed
> may still be incomplete.

I meant another piece:

"For implementations which implement multiple level of caches  ... 
<speaking about inclusive caches here> ... The software must place a 
SYNC instruction after the CACHE instruction whenever there are possible 
writebacks from the inner cache to ensure that the writeback data is 
resident in the outer cache before operating on the
outer cache. ... <the rest of statement is a bogeyman story about not 
doing so>

For implementations which implement muliple level of caches without the 
inclusion property, the use of a SYNC instruction after the CACHE 
instruction is still needed whenever writeback data has to be resident 
in the next level of memory hierarchy."

It seems the last sentence shall be also applied for inclusive caches too.

Gleb.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19  0:57       ` Maciej W. Rozycki
@ 2010-10-19 12:34         ` Ralf Baechle
  2010-10-19 20:11           ` Maciej W. Rozycki
  0 siblings, 1 reply; 36+ messages in thread
From: Ralf Baechle @ 2010-10-19 12:34 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Kevin Cernekee, Shinya Kuribayashi, linux-mips, linux-kernel

On Tue, Oct 19, 2010 at 01:57:43AM +0100, Maciej W. Rozycki wrote:

>  Ah, the old issue of the write-back barrier.  I can't comment on 
> Loongson, but for DEC IIRC the write-back buffer only needs to be taken 
> care of for uncached writes and they take a path separate to cached 
> writes.  I'd have to dig out the details to be sure.  IIRC the most 
> pathological case was the R2020 WB chip, but that was only used on systems 
> that didn't do DMA (namely DECstatation 3100 and 2100 boxes).

See R4000 User's Manual Version 2, page 326, "Uncached Loads and Stores".
Of course this can only happen on cache coherent or multiprocessor systems.
I guess none of the supported DEC MIPS systems is affected.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19  0:51         ` Kevin Cernekee
@ 2010-10-19 13:30           ` Shinya Kuribayashi
  0 siblings, 0 replies; 36+ messages in thread
From: Shinya Kuribayashi @ 2010-10-19 13:30 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: Shinya Kuribayashi, Ralf Baechle, linux-mips, linux-kernel

On 10/19/10 9:51 AM, Kevin Cernekee wrote:
> Correct.  This particular system makes no guarantees that data flushed
> out through CACHE operations will not overtake subsequent uncached
> stores.

Thanks for the clarification, understood.  So we only need to take care
of the order of out-bound data write transactions at the processor end,
and preceding uncached load is not required here.   Sorry for the noise.

   Shinya

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 8/9] MIPS: Honor L2 bypass bit
  2010-10-16 21:22 ` [PATCH resend 8/9] MIPS: Honor L2 bypass bit Kevin Cernekee
@ 2010-10-19 16:16   ` Ralf Baechle
  0 siblings, 0 replies; 36+ messages in thread
From: Ralf Baechle @ 2010-10-19 16:16 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: linux-mips, linux-kernel

On Sat, Oct 16, 2010 at 02:22:37PM -0700, Kevin Cernekee wrote:

> If CP0 CONFIG2 bit 12 (L2B) is set, the L2 cache is disabled and
> therefore Linux should not attempt to use it.
> 
> Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
> ---
>  arch/mips/mm/sc-mips.c |    5 +++++
>  1 files changed, 5 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
> index 5ab5fa8..d072b25 100644
> --- a/arch/mips/mm/sc-mips.c
> +++ b/arch/mips/mm/sc-mips.c
> @@ -79,6 +79,11 @@ static inline int __init mips_sc_probe(void)
>  		return 0;
>  
>  	config2 = read_c0_config2();
> +
> +	/* bypass bit */
> +	if (config2 & (1 << 12))
> +		return 0;

The spec I'm looking at says this bit is implementation defined so a
test for a particular CPU type would need to be added here.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19 12:34         ` Ralf Baechle
@ 2010-10-19 20:11           ` Maciej W. Rozycki
  2010-10-20  8:05             ` Gleb O. Raiko
  0 siblings, 1 reply; 36+ messages in thread
From: Maciej W. Rozycki @ 2010-10-19 20:11 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Kevin Cernekee, Shinya Kuribayashi, linux-mips, linux-kernel

On Tue, 19 Oct 2010, Ralf Baechle wrote:

> See R4000 User's Manual Version 2, page 326, "Uncached Loads and Stores".
> Of course this can only happen on cache coherent or multiprocessor systems.
> I guess none of the supported DEC MIPS systems is affected.

 Since none of the R4k DECstations is coherent or MP, I have not 
considered these implications.  The only MP DECsystem (there was no 
workstation variation), that is the 5800, was built with R3k chips in the 
first place and the chance we ever come across one, let alone support it, 
is epsilon.

 That said, R4k DECstations seem to perform aggressive write buffering in 
the chipset and to make sure a write has propagated to an MMIO register a 
SYNC and an uncached read operation are necessary.  The read may be from 
elsewhere apparently -- RAM at 0 seems just fine -- so the chipset seems 
to obey the SYNC semantics.

 I haven't investigated DMA dependencies and I think we currently only 
have one TURBOchannel device/driver only (that is the DEFTA/defxx FDDI 
thingy) making use of the generic DMA API on DECstations.  It seemed to 
work correctly the last time I tried; presumably either because the API 
Does The Right Thing, or by pure luck and right timings.  Note that the 
DEC/Motorola CAMEL FDDI chipset was quite an aggressive DMA agent for its 
time, certainly capable of saturating some lower-clocked implementations 
of the I/O bus.

 Then I think the onboard devices that do third-party DMA via the IOASIC 
such as the LANCE Ethernet are too obscure/arcane to consider them here 
and get any useful results -- any inconsistencies may well be masked by 
the odd sequences used to access the respective chips.  There were some 
hardly documented chipset errata too, making the whole thing yet more 
complicated and causing some "impossible" error scenarios.

  Maciej

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code
  2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
                   ` (8 preceding siblings ...)
  2010-10-17 16:59 ` [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Florian Fainelli
@ 2010-10-20  7:19 ` Ralf Baechle
  9 siblings, 0 replies; 36+ messages in thread
From: Ralf Baechle @ 2010-10-20  7:19 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: ffainelli, mbizon, linux-mips, linux-kernel

Thanks, queued for 2.6.37.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/9] MIPS: Add BMIPS CP0 register definitions
  2010-10-16 21:22 ` [PATCH 3/9] MIPS: Add BMIPS CP0 register definitions Kevin Cernekee
@ 2010-10-20  7:23   ` Ralf Baechle
  0 siblings, 0 replies; 36+ messages in thread
From: Ralf Baechle @ 2010-10-20  7:23 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: linux-mips, linux-kernel

Thanks, queued for 2.6.37.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-19 20:11           ` Maciej W. Rozycki
@ 2010-10-20  8:05             ` Gleb O. Raiko
  2010-10-20 17:26               ` Maciej W. Rozycki
  0 siblings, 1 reply; 36+ messages in thread
From: Gleb O. Raiko @ 2010-10-20  8:05 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Ralf Baechle, Kevin Cernekee, Shinya Kuribayashi, linux-mips,
	linux-kernel

On 20.10.2010 0:11, Maciej W. Rozycki wrote:
>   That said, R4k DECstations seem to perform aggressive write buffering in
> the chipset and to make sure a write has propagated to an MMIO register a
> SYNC and an uncached read operation are necessary.

Just uncached read may be enough. R4k shall pull data from its store 
buffer on uncached read.

>   I haven't investigated DMA dependencies and I think we currently only
> have one TURBOchannel device/driver only (that is the DEFTA/defxx FDDI
> thingy) making use of the generic DMA API on DECstations.  It seemed to
> work correctly the last time I tried; presumably either because the API
> Does The Right Thing, or by pure luck and right timings.

dfx_writel issues sync after store. BTW, it seems no uncached read 
issued here (just mb() is used, which seems to do sync only), so either 
those uncached read is not needed (unlikely) or data from dfx_writel 
wait somewhere in the chipset for being pulled by subsequent reads or 
writes.

Gleb.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-20  8:05             ` Gleb O. Raiko
@ 2010-10-20 17:26               ` Maciej W. Rozycki
  2010-10-21  8:52                 ` Gleb O. Raiko
  0 siblings, 1 reply; 36+ messages in thread
From: Maciej W. Rozycki @ 2010-10-20 17:26 UTC (permalink / raw)
  To: Gleb O. Raiko
  Cc: Ralf Baechle, Kevin Cernekee, Shinya Kuribayashi, linux-mips,
	linux-kernel

On Wed, 20 Oct 2010, Gleb O. Raiko wrote:

> >   That said, R4k DECstations seem to perform aggressive write buffering in
> > the chipset and to make sure a write has propagated to an MMIO register a
> > SYNC and an uncached read operation are necessary.
> 
> Just uncached read may be enough. R4k shall pull data from its store buffer on
> uncached read.

 I'm not sure what you mean: whether the processor will snoop the value to 
read in the store buffer or will it stall until the buffer has drained and 
issue the load on the external bus?

 I can't see the behaviour of uncached loads wrt uncached stores clearly 
documented anywhere for the R4400 processor (DEC used the SC variation, 
BTW).  There's no mention of uncached loads to have SYNC properties.  
Therefore in the context of one or more pending uncached stores I can 
assume one of the three for an uncached load:

1. If the addresses match, then the value loaded is snooped in (retrieved 
   from) the store buffer, no external cycle on the bus is seen.  This is 
   what the R2020 WB did.

2. The load bypasses the stores and therefore reaches the external bus 
   before the stores.  This is what the R3220 MB did and I believe the 
   R2020 WB defaulted to in the case of no address match.

3. The load stalls until the outstanding stores have completed and only 
   then appears on the external bus.

There's no hurt from using SYNC here and its semantics make it clear it 
enforces the case #3 above even if not otherwise guaranteed.  Otherwise I 
think the case #2 would be a reasonable default (i.e. one I'd recommend to 
a processor designer) as draining the store buffer on any uncached load 
whether needed or not is a waste of performance.

> >   I haven't investigated DMA dependencies and I think we currently only
> > have one TURBOchannel device/driver only (that is the DEFTA/defxx FDDI
> > thingy) making use of the generic DMA API on DECstations.  It seemed to
> > work correctly the last time I tried; presumably either because the API
> > Does The Right Thing, or by pure luck and right timings.
> 
> dfx_writel issues sync after store. BTW, it seems no uncached read issued here
> (just mb() is used, which seems to do sync only), so either those uncached
> read is not needed (unlikely) or data from dfx_writel wait somewhere in the
> chipset for being pulled by subsequent reads or writes.

 Ah, I could have added it myself ;) -- oddly enough even though the 
driver originated from DEC, they only used/tested it with x86 systems 
apparently, rather than the obvious choice of the Alpha that implemented a 
much, much weaker ordering model that any MIPS chip ever did.

  Maciej

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-20 17:26               ` Maciej W. Rozycki
@ 2010-10-21  8:52                 ` Gleb O. Raiko
  2010-10-24  5:12                   ` Maciej W. Rozycki
  0 siblings, 1 reply; 36+ messages in thread
From: Gleb O. Raiko @ 2010-10-21  8:52 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Ralf Baechle, Kevin Cernekee, Shinya Kuribayashi, linux-mips,
	linux-kernel

On 20.10.2010 21:26, Maciej W. Rozycki wrote:
> On Wed, 20 Oct 2010, Gleb O. Raiko wrote:
>   I'm not sure what you mean: whether the processor will snoop the value to
> read in the store buffer or will it stall until the buffer has drained and
> issue the load on the external bus?
I meant the latter.

>   I can't see the behaviour of uncached loads wrt uncached stores clearly
> documented anywhere for the R4400 processor (DEC used the SC variation,
> BTW).  There's no mention of uncached loads to have SYNC properties.
I agree the docs are unclear here. They contain an example of cached and 
uncached stores (Ralf has pointed to already), but no clear explanation 
for mix of loads and stores. Sure, it's safer to keep both sync and 
uncached load.

> Therefore in the context of one or more pending uncached stores I can
> assume one of the three for an uncached load:
>
> 1. If the addresses match, then the value loaded is snooped in (retrieved
>     from) the store buffer, no external cycle on the bus is seen.  This is
>     what the R2020 WB did.
>
> 2. The load bypasses the stores and therefore reaches the external bus
>     before the stores.  This is what the R3220 MB did and I believe the
>     R2020 WB defaulted to in the case of no address match.
>
> 3. The load stalls until the outstanding stores have completed and only
>     then appears on the external bus.
>
> There's no hurt from using SYNC here and its semantics make it clear it
> enforces the case #3 above even if not otherwise guaranteed.  Otherwise I
> think the case #2 would be a reasonable default (i.e. one I'd recommend to
> a processor designer) as draining the store buffer on any uncached load
> whether needed or not is a waste of performance.
There is no such thing like performance in case of uncached loads.
The case #2 requires:
1. sync
2. additional operations (usually just a read) to pull data behind input 
buffers on an IO bus.

While it's ok to put that in MMIO reads/writes as you've done, it's 
almost impossible to program X server in that way, for example. This 
beast considers a frame buffer as an memory array with strong ordering. 
That's why I'd vote for the case #3. Not because it outperforms #2 in 
the real life (who cares for 0.0001% gain), but because IO devices 
requires strong ordering.

Gleb.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 9/9] MIPS: Allow UserLocal on MIPS_R1 processors
  2010-10-16 21:22 ` [PATCH resend 9/9] MIPS: Allow UserLocal on MIPS_R1 processors Kevin Cernekee
@ 2010-10-21 14:32   ` Ralf Baechle
  0 siblings, 0 replies; 36+ messages in thread
From: Ralf Baechle @ 2010-10-21 14:32 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: linux-mips, linux-kernel

Thanks, also queued for 2.6.37.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/9] MIPS: Install handlers for software IRQs
  2010-10-16 21:22 ` [PATCH 4/9] MIPS: Install handlers for software IRQs Kevin Cernekee
@ 2010-10-21 14:44   ` Ralf Baechle
  2011-05-19 12:31   ` Ralf Baechle
  1 sibling, 0 replies; 36+ messages in thread
From: Ralf Baechle @ 2010-10-21 14:44 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: linux-mips, linux-kernel

Thanks, queued for 2.6.37.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH resend 5/9] MIPS: sync after cacheflush
  2010-10-21  8:52                 ` Gleb O. Raiko
@ 2010-10-24  5:12                   ` Maciej W. Rozycki
  0 siblings, 0 replies; 36+ messages in thread
From: Maciej W. Rozycki @ 2010-10-24  5:12 UTC (permalink / raw)
  To: Gleb O. Raiko
  Cc: Ralf Baechle, Kevin Cernekee, Shinya Kuribayashi, linux-mips,
	linux-kernel

On Thu, 21 Oct 2010, Gleb O. Raiko wrote:

> >   I'm not sure what you mean: whether the processor will snoop the value to
> > read in the store buffer or will it stall until the buffer has drained and
> > issue the load on the external bus?
> I meant the latter.

 OK, I hoped so, but just double-checked to be sure. :)

> > Therefore in the context of one or more pending uncached stores I can
> > assume one of the three for an uncached load:
> > 
> > 1. If the addresses match, then the value loaded is snooped in (retrieved
> >     from) the store buffer, no external cycle on the bus is seen.  This is
> >     what the R2020 WB did.
> > 
> > 2. The load bypasses the stores and therefore reaches the external bus
> >     before the stores.  This is what the R3220 MB did and I believe the
> >     R2020 WB defaulted to in the case of no address match.
> > 
> > 3. The load stalls until the outstanding stores have completed and only
> >     then appears on the external bus.
> > 
> > There's no hurt from using SYNC here and its semantics make it clear it
> > enforces the case #3 above even if not otherwise guaranteed.  Otherwise I
> > think the case #2 would be a reasonable default (i.e. one I'd recommend to
> > a processor designer) as draining the store buffer on any uncached load
> > whether needed or not is a waste of performance.
> There is no such thing like performance in case of uncached loads.
> The case #2 requires:
> 1. sync
> 2. additional operations (usually just a read) to pull data behind input
> buffers on an IO bus.

 When talking to MMIO you often don't need to force the outstanding writes 
to complete before you exit some driver's code.  They will eventually 
reach the device and to their things in due course.

 A notable exception are some kinds of side effects that need to be 
synchronised to prevent races.  For example to avoid wasting processing 
time for handling spurious interrupts you do want to make sure a write 
that acknowledges a pending interrupt has been recorded by the handler 
reaches the respective device's register before the interrupt has been 
cleared in the interrupt controller.

 On the other hand you do not need to issue a writeback of a request for 
the device to look for more data in the outgoing DMA descriptor ring.

> While it's ok to put that in MMIO reads/writes as you've done, it's almost
> impossible to program X server in that way, for example. This beast considers
> a frame buffer as an memory array with strong ordering. That's why I'd vote
> for the case #3. Not because it outperforms #2 in the real life (who cares for
> 0.0001% gain), but because IO devices requires strong ordering.

 Ah, framebuffers.  The DEC Alpha people somehow managed to get them 
right. :)  What you say is of course true for a dumb framebuffer -- but 
who cares about dumb framebuffers these days?

 A half-decent graphics controller will provide a set of typical masked 
raster operations: STORE, AND, OR, XOR, etc. so that you don't have to 
issue RMW cycles to framebuffer's memory -- all you need are bulk writes, 
where the order does not really matter and which can be pipelined (the 
graphics controller may be able to replicate writes too, such as across 
the whole scanline -- good for the bandwidth!).

 You may still have to issue some barriers around accesses to 
framebuffer's control registers, but that's about it.  And the TGA X11 
driver undobtedly gets these things right or otherwise nobody could have 
used it and the adapters it supports with an Alpha (as a side note: that 
graphics chip/software applies to MIPS-based DECstation systems too).  
This is all early 1990s' technology, no rocket science anymore. :)

 There's a technical report on the techniques used somewhere on the web -- 
look for "Smart Frame Buffer" (and don't forget to check its date ;) ).

 In general: don't break the CPU because you've got a broken piece of 
software -- fix the piece instead!

 I stand by my choice -- inefficiency from unnecessary (implicit) ordering 
barriers accumulates.  These operations are so slow (with latencies 
possibly counted in hundreds of CPU cycles) it really matters whether you 
need ten or just one, especially with the speeds of contemporary 
processors.

  Maciej

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/9] MIPS: Install handlers for software IRQs
  2010-10-16 21:22 ` [PATCH 4/9] MIPS: Install handlers for software IRQs Kevin Cernekee
  2010-10-21 14:44   ` Ralf Baechle
@ 2011-05-19 12:31   ` Ralf Baechle
  1 sibling, 0 replies; 36+ messages in thread
From: Ralf Baechle @ 2011-05-19 12:31 UTC (permalink / raw)
  To: Kevin Cernekee; +Cc: linux-mips, linux-kernel

On Sat, Oct 16, 2010 at 02:22:33PM -0700, Kevin Cernekee wrote:

> BMIPS4350/4380/5000 CMT/SMT all use SW INT0/INT1 for inter-thread
> signaling.

I had previously ages ago applied this patch but it somehow vapolized
so I just refreshed the patch and queued it for 2.6.41, sorry.

  Ralf

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2011-05-19 12:31 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-16 21:22 [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Kevin Cernekee
2010-10-16 21:22 ` [PATCH 2/9] MIPS: Add BMIPS processor types to Kconfig Kevin Cernekee
2010-10-17 17:01   ` Florian Fainelli
2010-10-16 21:22 ` [PATCH 3/9] MIPS: Add BMIPS CP0 register definitions Kevin Cernekee
2010-10-20  7:23   ` Ralf Baechle
2010-10-16 21:22 ` [PATCH 4/9] MIPS: Install handlers for software IRQs Kevin Cernekee
2010-10-21 14:44   ` Ralf Baechle
2011-05-19 12:31   ` Ralf Baechle
2010-10-16 21:22 ` [PATCH resend 5/9] MIPS: sync after cacheflush Kevin Cernekee
2010-10-18 13:44   ` Shinya Kuribayashi
2010-10-18 18:34     ` Kevin Cernekee
2010-10-19  0:03       ` Shinya Kuribayashi
2010-10-19  0:51         ` Kevin Cernekee
2010-10-19 13:30           ` Shinya Kuribayashi
2010-10-19  0:57       ` Maciej W. Rozycki
2010-10-19 12:34         ` Ralf Baechle
2010-10-19 20:11           ` Maciej W. Rozycki
2010-10-20  8:05             ` Gleb O. Raiko
2010-10-20 17:26               ` Maciej W. Rozycki
2010-10-21  8:52                 ` Gleb O. Raiko
2010-10-24  5:12                   ` Maciej W. Rozycki
2010-10-18 19:19     ` Ralf Baechle
2010-10-18 19:41       ` Kevin Cernekee
2010-10-18 22:50         ` Ralf Baechle
2010-10-19  0:45           ` Maciej W. Rozycki
2010-10-19  8:54         ` Gleb O. Raiko
2010-10-19  9:17           ` Ralf Baechle
2010-10-19 10:15             ` Gleb O. Raiko
2010-10-16 21:22 ` [PATCH resend 6/9] MIPS: pfn_valid() is broken on low memory HIGHMEM systems Kevin Cernekee
2010-10-16 21:22 ` [PATCH v2 resend 7/9] MIPS: Move FIXADDR_TOP into spaces.h Kevin Cernekee
2010-10-16 21:22 ` [PATCH resend 8/9] MIPS: Honor L2 bypass bit Kevin Cernekee
2010-10-19 16:16   ` Ralf Baechle
2010-10-16 21:22 ` [PATCH resend 9/9] MIPS: Allow UserLocal on MIPS_R1 processors Kevin Cernekee
2010-10-21 14:32   ` Ralf Baechle
2010-10-17 16:59 ` [PATCH 1/9] MIPS: Decouple BMIPS CPU support from bcm47xx/bcm63xx SoC code Florian Fainelli
2010-10-20  7:19 ` Ralf Baechle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).