All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
@ 2016-11-15  7:00 ` Geetha sowjanya
  0 siblings, 0 replies; 10+ messages in thread
From: Geetha sowjanya @ 2016-11-15  7:00 UTC (permalink / raw)
  To: robin.murphy-5wv7dgnIgG8, marc.zyngier-5wv7dgnIgG8,
	jason-NLaQJdtUoK4Be96aLqz0jA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	will.deacon-5wv7dgnIgG8
  Cc: Prasun.Kapoor-YGCgFSpz5w/QT0dZR+AlfA, suzuki.poulose-5wv7dgnIgG8,
	catalin.marinas-5wv7dgnIgG8, Geetha sowjanya,
	Tirumalesh Chalamarla,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	james.morse-5wv7dgnIgG8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

  This patch implements Cavium ThunderX erratum 28168.

  PCI requires stores complete in order. Due to erratum #28168
  PCI-inbound MSI-X store to the interrupt controller are delivered
  to the interrupt controller before older PCI-inbound memory stores
  are committed.
  Doing a sync on SMMU will make sure all prior data transfers are
  completed before invoking ISR.

Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Geetha sowjanya <gakula-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
---
 arch/arm64/Kconfig                  |   11 +++++++++++
 arch/arm64/Kconfig.platforms        |    1 +
 arch/arm64/include/asm/cpufeature.h |    3 ++-
 arch/arm64/kernel/cpu_errata.c      |   16 ++++++++++++++++
 drivers/iommu/arm-smmu.c            |   24 ++++++++++++++++++++++++
 drivers/irqchip/irq-gic-common.h    |    1 +
 drivers/irqchip/irq-gic-v3.c        |   19 +++++++++++++++++++
 7 files changed, 74 insertions(+), 1 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 30398db..751972c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456
 
 	  If unsure, say Y.
 
+config CAVIUM_ERRATUM_28168
+       bool "Cavium erratum 28168: Make sure DMA data transfer is done before MSIX"
+       depends on ARCH_THUNDER && ARM64
+       default y
+       help
+        Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
+        controller are delivered to the interrupt controller before older
+        PCI-inbound memory stores are committed. Doing a sync on SMMU
+        will make sure all prior data transfers are done before invoking ISR.
+
+        If unsure, say Y.
 endmenu
 
 
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index cfbdf02..2ac4ac6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -185,6 +185,7 @@ config ARCH_SPRD
 
 config ARCH_THUNDER
 	bool "Cavium Inc. Thunder SoC Family"
+	select IRQ_PREFLOW_FASTEOI
 	help
 	  This enables support for Cavium's Thunder Family of SoCs.
 
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 758d74f..821fc3c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -40,8 +40,9 @@
 #define ARM64_HAS_32BIT_EL0			13
 #define ARM64_HYP_OFFSET_LOW			14
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
+#define ARM64_WORKAROUND_CAVIUM_28168		16
 
-#define ARM64_NCAPS				16
+#define ARM64_NCAPS				17
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 0150394..0841a12 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -122,6 +122,22 @@ static void cpu_enable_trap_ctr_access(void *__unused)
 		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
 	},
 #endif
+#ifdef CONFIG_CAVIUM_ERRATUM_28168
+	{
+	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
+		.desc = "Cavium erratum 28168",
+		.capability = ARM64_WORKAROUND_CAVIUM_28168,
+		MIDR_RANGE(MIDR_THUNDERX, 0x00,
+			   (1 << MIDR_VARIANT_SHIFT) | 1),
+	},
+	{
+	/* Cavium ThunderX, T81 pass 1.0 */
+		.desc = "Cavium erratum 28168",
+		.capability = ARM64_WORKAROUND_CAVIUM_28168,
+		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+	},
+#endif
+
 	{
 		.desc = "Mismatched cache line size",
 		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index c841eb7..1b4555c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -570,6 +570,30 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 	}
 }
 
+/*
+ * Cavium ThunderX erratum 28168
+ *
+ * Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
+ * controller are delivered to the interrupt controller before older
+ * PCI-inbound memory stores are committed. Doing a sync on SMMU
+ * will make sure all prior data transfers are completed before
+ * invoking ISR.
+ *
+ */
+void cavium_arm_smmu_tlb_sync(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct arm_smmu_device *smmu;
+
+	smmu = fwspec_smmu(fwspec);
+	if (!smmu)
+		return;
+	__arm_smmu_tlb_sync(smmu);
+
+}
+EXPORT_SYMBOL(cavium_arm_smmu_tlb_sync);
+
+
 static void arm_smmu_tlb_sync(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 205e5fd..4e88f55 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -38,4 +38,5 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 
 void gic_set_kvm_info(const struct gic_kvm_info *info);
 
+void cavium_arm_smmu_tlb_sync(struct device *dev);
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 19d642e..723cebe 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -28,6 +28,8 @@
 #include <linux/of_irq.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
 
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-gic-common.h>
@@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
 
 #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
 
+/*
+ * Due to #28168 erratum in ThunderX,
+ * we need to make sure DMA data transfer is done before MSIX.
+ */
+static void cavium_irq_perflow_handler(struct irq_data *data)
+{
+	struct pci_dev *pdev;
+
+	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
+	if ((pdev->vendor != 0x177d) &&
+			((pdev->device & 0xA000) != 0xA000))
+		cavium_arm_smmu_tlb_sync(&pdev->dev);
+}
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
 {
@@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			return -EPERM;
 		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
+		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
+			__irq_set_preflow_handler(irq,
+						  cavium_irq_perflow_handler);
 	}
 
 	return 0;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
@ 2016-11-15  7:00 ` Geetha sowjanya
  0 siblings, 0 replies; 10+ messages in thread
From: Geetha sowjanya @ 2016-11-15  7:00 UTC (permalink / raw)
  To: linux-arm-kernel

From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>

  This patch implements Cavium ThunderX erratum 28168.

  PCI requires stores complete in order. Due to erratum #28168
  PCI-inbound MSI-X store to the interrupt controller are delivered
  to the interrupt controller before older PCI-inbound memory stores
  are committed.
  Doing a sync on SMMU will make sure all prior data transfers are
  completed before invoking ISR.

Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
Signed-off-by: Geetha sowjanya <gakula@caviumnetworks.com>
---
 arch/arm64/Kconfig                  |   11 +++++++++++
 arch/arm64/Kconfig.platforms        |    1 +
 arch/arm64/include/asm/cpufeature.h |    3 ++-
 arch/arm64/kernel/cpu_errata.c      |   16 ++++++++++++++++
 drivers/iommu/arm-smmu.c            |   24 ++++++++++++++++++++++++
 drivers/irqchip/irq-gic-common.h    |    1 +
 drivers/irqchip/irq-gic-v3.c        |   19 +++++++++++++++++++
 7 files changed, 74 insertions(+), 1 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 30398db..751972c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456
 
 	  If unsure, say Y.
 
+config CAVIUM_ERRATUM_28168
+       bool "Cavium erratum 28168: Make sure DMA data transfer is done before MSIX"
+       depends on ARCH_THUNDER && ARM64
+       default y
+       help
+        Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
+        controller are delivered to the interrupt controller before older
+        PCI-inbound memory stores are committed. Doing a sync on SMMU
+        will make sure all prior data transfers are done before invoking ISR.
+
+        If unsure, say Y.
 endmenu
 
 
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index cfbdf02..2ac4ac6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -185,6 +185,7 @@ config ARCH_SPRD
 
 config ARCH_THUNDER
 	bool "Cavium Inc. Thunder SoC Family"
+	select IRQ_PREFLOW_FASTEOI
 	help
 	  This enables support for Cavium's Thunder Family of SoCs.
 
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 758d74f..821fc3c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -40,8 +40,9 @@
 #define ARM64_HAS_32BIT_EL0			13
 #define ARM64_HYP_OFFSET_LOW			14
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
+#define ARM64_WORKAROUND_CAVIUM_28168		16
 
-#define ARM64_NCAPS				16
+#define ARM64_NCAPS				17
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 0150394..0841a12 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -122,6 +122,22 @@ static void cpu_enable_trap_ctr_access(void *__unused)
 		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
 	},
 #endif
+#ifdef CONFIG_CAVIUM_ERRATUM_28168
+	{
+	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
+		.desc = "Cavium erratum 28168",
+		.capability = ARM64_WORKAROUND_CAVIUM_28168,
+		MIDR_RANGE(MIDR_THUNDERX, 0x00,
+			   (1 << MIDR_VARIANT_SHIFT) | 1),
+	},
+	{
+	/* Cavium ThunderX, T81 pass 1.0 */
+		.desc = "Cavium erratum 28168",
+		.capability = ARM64_WORKAROUND_CAVIUM_28168,
+		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+	},
+#endif
+
 	{
 		.desc = "Mismatched cache line size",
 		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index c841eb7..1b4555c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -570,6 +570,30 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 	}
 }
 
+/*
+ * Cavium ThunderX erratum 28168
+ *
+ * Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
+ * controller are delivered to the interrupt controller before older
+ * PCI-inbound memory stores are committed. Doing a sync on SMMU
+ * will make sure all prior data transfers are completed before
+ * invoking ISR.
+ *
+ */
+void cavium_arm_smmu_tlb_sync(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct arm_smmu_device *smmu;
+
+	smmu = fwspec_smmu(fwspec);
+	if (!smmu)
+		return;
+	__arm_smmu_tlb_sync(smmu);
+
+}
+EXPORT_SYMBOL(cavium_arm_smmu_tlb_sync);
+
+
 static void arm_smmu_tlb_sync(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 205e5fd..4e88f55 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -38,4 +38,5 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 
 void gic_set_kvm_info(const struct gic_kvm_info *info);
 
+void cavium_arm_smmu_tlb_sync(struct device *dev);
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 19d642e..723cebe 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -28,6 +28,8 @@
 #include <linux/of_irq.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
 
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-gic-common.h>
@@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
 
 #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
 
+/*
+ * Due to #28168 erratum in ThunderX,
+ * we need to make sure DMA data transfer is done before MSIX.
+ */
+static void cavium_irq_perflow_handler(struct irq_data *data)
+{
+	struct pci_dev *pdev;
+
+	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
+	if ((pdev->vendor != 0x177d) &&
+			((pdev->device & 0xA000) != 0xA000))
+		cavium_arm_smmu_tlb_sync(&pdev->dev);
+}
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
 {
@@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			return -EPERM;
 		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
+		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
+			__irq_set_preflow_handler(irq,
+						  cavium_irq_perflow_handler);
 	}
 
 	return 0;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
  2016-11-15  7:00 ` Geetha sowjanya
@ 2016-11-15  9:26     ` Marc Zyngier
  -1 siblings, 0 replies; 10+ messages in thread
From: Marc Zyngier @ 2016-11-15  9:26 UTC (permalink / raw)
  To: Geetha sowjanya, robin.murphy-5wv7dgnIgG8,
	jason-NLaQJdtUoK4Be96aLqz0jA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	will.deacon-5wv7dgnIgG8
  Cc: Prasun.Kapoor-YGCgFSpz5w/QT0dZR+AlfA, suzuki.poulose-5wv7dgnIgG8,
	catalin.marinas-5wv7dgnIgG8, Tirumalesh Chalamarla,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	james.morse-5wv7dgnIgG8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On 15/11/16 07:00, Geetha sowjanya wrote:
> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
> 
>   This patch implements Cavium ThunderX erratum 28168.
> 
>   PCI requires stores complete in order. Due to erratum #28168
>   PCI-inbound MSI-X store to the interrupt controller are delivered
>   to the interrupt controller before older PCI-inbound memory stores
>   are committed.
>   Doing a sync on SMMU will make sure all prior data transfers are
>   completed before invoking ISR.
> 
> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Geetha sowjanya <gakula-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
> ---
>  arch/arm64/Kconfig                  |   11 +++++++++++
>  arch/arm64/Kconfig.platforms        |    1 +
>  arch/arm64/include/asm/cpufeature.h |    3 ++-
>  arch/arm64/kernel/cpu_errata.c      |   16 ++++++++++++++++
>  drivers/iommu/arm-smmu.c            |   24 ++++++++++++++++++++++++
>  drivers/irqchip/irq-gic-common.h    |    1 +
>  drivers/irqchip/irq-gic-v3.c        |   19 +++++++++++++++++++
>  7 files changed, 74 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 30398db..751972c 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456
>  
>  	  If unsure, say Y.
>  
> +config CAVIUM_ERRATUM_28168
> +       bool "Cavium erratum 28168: Make sure DMA data transfer is done before MSIX"
> +       depends on ARCH_THUNDER && ARM64
> +       default y
> +       help
> +        Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
> +        controller are delivered to the interrupt controller before older
> +        PCI-inbound memory stores are committed. Doing a sync on SMMU
> +        will make sure all prior data transfers are done before invoking ISR.
> +
> +        If unsure, say Y.

Where is the entry in Documentation/arm64/silicon-errata.txt?

>  endmenu
>  
>  
> diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
> index cfbdf02..2ac4ac6 100644
> --- a/arch/arm64/Kconfig.platforms
> +++ b/arch/arm64/Kconfig.platforms
> @@ -185,6 +185,7 @@ config ARCH_SPRD
>  
>  config ARCH_THUNDER
>  	bool "Cavium Inc. Thunder SoC Family"
> +	select IRQ_PREFLOW_FASTEOI
>  	help
>  	  This enables support for Cavium's Thunder Family of SoCs.
>  
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 758d74f..821fc3c 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -40,8 +40,9 @@
>  #define ARM64_HAS_32BIT_EL0			13
>  #define ARM64_HYP_OFFSET_LOW			14
>  #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
> +#define ARM64_WORKAROUND_CAVIUM_28168		16
>  
> -#define ARM64_NCAPS				16
> +#define ARM64_NCAPS				17
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 0150394..0841a12 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -122,6 +122,22 @@ static void cpu_enable_trap_ctr_access(void *__unused)
>  		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
>  	},
>  #endif
> +#ifdef CONFIG_CAVIUM_ERRATUM_28168
> +	{
> +	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
> +		.desc = "Cavium erratum 28168",
> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
> +		MIDR_RANGE(MIDR_THUNDERX, 0x00,
> +			   (1 << MIDR_VARIANT_SHIFT) | 1),
> +	},
> +	{
> +	/* Cavium ThunderX, T81 pass 1.0 */
> +		.desc = "Cavium erratum 28168",
> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
> +		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
> +	},
> +#endif

How is that a CPU bug? Shouldn't that be keyed on the SMMU version or
the ITs version?

> +
>  	{
>  		.desc = "Mismatched cache line size",
>  		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index c841eb7..1b4555c 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -570,6 +570,30 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>  	}
>  }
>  
> +/*
> + * Cavium ThunderX erratum 28168
> + *
> + * Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
> + * controller are delivered to the interrupt controller before older
> + * PCI-inbound memory stores are committed. Doing a sync on SMMU
> + * will make sure all prior data transfers are completed before
> + * invoking ISR.
> + *
> + */
> +void cavium_arm_smmu_tlb_sync(struct device *dev)
> +{
> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> +	struct arm_smmu_device *smmu;
> +
> +	smmu = fwspec_smmu(fwspec);
> +	if (!smmu)
> +		return;
> +	__arm_smmu_tlb_sync(smmu);
> +
> +}
> +EXPORT_SYMBOL(cavium_arm_smmu_tlb_sync);

Why does this need to be exported? The only user can only be built-in.

> +
> +
>  static void arm_smmu_tlb_sync(void *cookie)
>  {
>  	struct arm_smmu_domain *smmu_domain = cookie;
> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
> index 205e5fd..4e88f55 100644
> --- a/drivers/irqchip/irq-gic-common.h
> +++ b/drivers/irqchip/irq-gic-common.h
> @@ -38,4 +38,5 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>  
>  void gic_set_kvm_info(const struct gic_kvm_info *info);
>  
> +void cavium_arm_smmu_tlb_sync(struct device *dev);

Why should this be visible to GICv2 as well? I have the ugly feeling
this should stay private to the SMMU code and that a more standard
mechanism should be used... Robin, is there anything else we could
piggy-back on?

>  #endif /* _IRQ_GIC_COMMON_H */
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index 19d642e..723cebe 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -28,6 +28,8 @@
>  #include <linux/of_irq.h>
>  #include <linux/percpu.h>
>  #include <linux/slab.h>
> +#include <linux/msi.h>
> +#include <linux/pci.h>
>  
>  #include <linux/irqchip.h>
>  #include <linux/irqchip/arm-gic-common.h>
> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>  
>  #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>  
> +/*
> + * Due to #28168 erratum in ThunderX,
> + * we need to make sure DMA data transfer is done before MSIX.
> + */
> +static void cavium_irq_perflow_handler(struct irq_data *data)
> +{
> +	struct pci_dev *pdev;
> +
> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));

What happens if this is not a PCI device?

> +	if ((pdev->vendor != 0x177d) &&
> +			((pdev->device & 0xA000) != 0xA000))
> +		cavium_arm_smmu_tlb_sync(&pdev->dev);

I've asked that before. What makes Cavium devices so special that they
are not sensitive to this bug?

> +}
> +
>  static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>  			      irq_hw_number_t hw)
>  {
> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>  			return -EPERM;
>  		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>  				    handle_fasteoi_irq, NULL, NULL);
> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
> +			__irq_set_preflow_handler(irq,
> +						  cavium_irq_perflow_handler);

What happens if SMMUv2 is not compiled in? Also, since this only affects
LPI signaling, why is this in the core GICv3 code and not in the ITS.
And more specifically, in the PCI part of the ITS, since you seem to
exclusively consider PCI?

>  	}
>  
>  	return 0;
> 

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
@ 2016-11-15  9:26     ` Marc Zyngier
  0 siblings, 0 replies; 10+ messages in thread
From: Marc Zyngier @ 2016-11-15  9:26 UTC (permalink / raw)
  To: linux-arm-kernel

On 15/11/16 07:00, Geetha sowjanya wrote:
> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
> 
>   This patch implements Cavium ThunderX erratum 28168.
> 
>   PCI requires stores complete in order. Due to erratum #28168
>   PCI-inbound MSI-X store to the interrupt controller are delivered
>   to the interrupt controller before older PCI-inbound memory stores
>   are committed.
>   Doing a sync on SMMU will make sure all prior data transfers are
>   completed before invoking ISR.
> 
> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
> Signed-off-by: Geetha sowjanya <gakula@caviumnetworks.com>
> ---
>  arch/arm64/Kconfig                  |   11 +++++++++++
>  arch/arm64/Kconfig.platforms        |    1 +
>  arch/arm64/include/asm/cpufeature.h |    3 ++-
>  arch/arm64/kernel/cpu_errata.c      |   16 ++++++++++++++++
>  drivers/iommu/arm-smmu.c            |   24 ++++++++++++++++++++++++
>  drivers/irqchip/irq-gic-common.h    |    1 +
>  drivers/irqchip/irq-gic-v3.c        |   19 +++++++++++++++++++
>  7 files changed, 74 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 30398db..751972c 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456
>  
>  	  If unsure, say Y.
>  
> +config CAVIUM_ERRATUM_28168
> +       bool "Cavium erratum 28168: Make sure DMA data transfer is done before MSIX"
> +       depends on ARCH_THUNDER && ARM64
> +       default y
> +       help
> +        Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
> +        controller are delivered to the interrupt controller before older
> +        PCI-inbound memory stores are committed. Doing a sync on SMMU
> +        will make sure all prior data transfers are done before invoking ISR.
> +
> +        If unsure, say Y.

Where is the entry in Documentation/arm64/silicon-errata.txt?

>  endmenu
>  
>  
> diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
> index cfbdf02..2ac4ac6 100644
> --- a/arch/arm64/Kconfig.platforms
> +++ b/arch/arm64/Kconfig.platforms
> @@ -185,6 +185,7 @@ config ARCH_SPRD
>  
>  config ARCH_THUNDER
>  	bool "Cavium Inc. Thunder SoC Family"
> +	select IRQ_PREFLOW_FASTEOI
>  	help
>  	  This enables support for Cavium's Thunder Family of SoCs.
>  
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 758d74f..821fc3c 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -40,8 +40,9 @@
>  #define ARM64_HAS_32BIT_EL0			13
>  #define ARM64_HYP_OFFSET_LOW			14
>  #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
> +#define ARM64_WORKAROUND_CAVIUM_28168		16
>  
> -#define ARM64_NCAPS				16
> +#define ARM64_NCAPS				17
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 0150394..0841a12 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -122,6 +122,22 @@ static void cpu_enable_trap_ctr_access(void *__unused)
>  		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
>  	},
>  #endif
> +#ifdef CONFIG_CAVIUM_ERRATUM_28168
> +	{
> +	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
> +		.desc = "Cavium erratum 28168",
> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
> +		MIDR_RANGE(MIDR_THUNDERX, 0x00,
> +			   (1 << MIDR_VARIANT_SHIFT) | 1),
> +	},
> +	{
> +	/* Cavium ThunderX, T81 pass 1.0 */
> +		.desc = "Cavium erratum 28168",
> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
> +		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
> +	},
> +#endif

How is that a CPU bug? Shouldn't that be keyed on the SMMU version or
the ITs version?

> +
>  	{
>  		.desc = "Mismatched cache line size",
>  		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index c841eb7..1b4555c 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -570,6 +570,30 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>  	}
>  }
>  
> +/*
> + * Cavium ThunderX erratum 28168
> + *
> + * Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
> + * controller are delivered to the interrupt controller before older
> + * PCI-inbound memory stores are committed. Doing a sync on SMMU
> + * will make sure all prior data transfers are completed before
> + * invoking ISR.
> + *
> + */
> +void cavium_arm_smmu_tlb_sync(struct device *dev)
> +{
> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> +	struct arm_smmu_device *smmu;
> +
> +	smmu = fwspec_smmu(fwspec);
> +	if (!smmu)
> +		return;
> +	__arm_smmu_tlb_sync(smmu);
> +
> +}
> +EXPORT_SYMBOL(cavium_arm_smmu_tlb_sync);

Why does this need to be exported? The only user can only be built-in.

> +
> +
>  static void arm_smmu_tlb_sync(void *cookie)
>  {
>  	struct arm_smmu_domain *smmu_domain = cookie;
> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
> index 205e5fd..4e88f55 100644
> --- a/drivers/irqchip/irq-gic-common.h
> +++ b/drivers/irqchip/irq-gic-common.h
> @@ -38,4 +38,5 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>  
>  void gic_set_kvm_info(const struct gic_kvm_info *info);
>  
> +void cavium_arm_smmu_tlb_sync(struct device *dev);

Why should this be visible to GICv2 as well? I have the ugly feeling
this should stay private to the SMMU code and that a more standard
mechanism should be used... Robin, is there anything else we could
piggy-back on?

>  #endif /* _IRQ_GIC_COMMON_H */
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index 19d642e..723cebe 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -28,6 +28,8 @@
>  #include <linux/of_irq.h>
>  #include <linux/percpu.h>
>  #include <linux/slab.h>
> +#include <linux/msi.h>
> +#include <linux/pci.h>
>  
>  #include <linux/irqchip.h>
>  #include <linux/irqchip/arm-gic-common.h>
> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>  
>  #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>  
> +/*
> + * Due to #28168 erratum in ThunderX,
> + * we need to make sure DMA data transfer is done before MSIX.
> + */
> +static void cavium_irq_perflow_handler(struct irq_data *data)
> +{
> +	struct pci_dev *pdev;
> +
> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));

What happens if this is not a PCI device?

> +	if ((pdev->vendor != 0x177d) &&
> +			((pdev->device & 0xA000) != 0xA000))
> +		cavium_arm_smmu_tlb_sync(&pdev->dev);

I've asked that before. What makes Cavium devices so special that they
are not sensitive to this bug?

> +}
> +
>  static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>  			      irq_hw_number_t hw)
>  {
> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>  			return -EPERM;
>  		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>  				    handle_fasteoi_irq, NULL, NULL);
> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
> +			__irq_set_preflow_handler(irq,
> +						  cavium_irq_perflow_handler);

What happens if SMMUv2 is not compiled in? Also, since this only affects
LPI signaling, why is this in the core GICv3 code and not in the ITS.
And more specifically, in the PCI part of the ITS, since you seem to
exclusively consider PCI?

>  	}
>  
>  	return 0;
> 

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
  2016-11-15  9:26     ` Marc Zyngier
@ 2016-11-15 12:36         ` Robin Murphy
  -1 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2016-11-15 12:36 UTC (permalink / raw)
  To: Marc Zyngier, Geetha sowjanya, jason-NLaQJdtUoK4Be96aLqz0jA,
	tglx-hfZtesqFncYOwBW4kG4KsQ, will.deacon-5wv7dgnIgG8
  Cc: Prasun.Kapoor-YGCgFSpz5w/QT0dZR+AlfA, suzuki.poulose-5wv7dgnIgG8,
	catalin.marinas-5wv7dgnIgG8, Tirumalesh Chalamarla,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	james.morse-5wv7dgnIgG8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On 15/11/16 09:26, Marc Zyngier wrote:
> On 15/11/16 07:00, Geetha sowjanya wrote:
>> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
>>
>>   This patch implements Cavium ThunderX erratum 28168.
>>
>>   PCI requires stores complete in order. Due to erratum #28168
>>   PCI-inbound MSI-X store to the interrupt controller are delivered
>>   to the interrupt controller before older PCI-inbound memory stores
>>   are committed.
>>   Doing a sync on SMMU will make sure all prior data transfers are
>>   completed before invoking ISR.
>>
>> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
>> Signed-off-by: Geetha sowjanya <gakula-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
>> ---
>>  arch/arm64/Kconfig                  |   11 +++++++++++
>>  arch/arm64/Kconfig.platforms        |    1 +
>>  arch/arm64/include/asm/cpufeature.h |    3 ++-
>>  arch/arm64/kernel/cpu_errata.c      |   16 ++++++++++++++++
>>  drivers/iommu/arm-smmu.c            |   24 ++++++++++++++++++++++++
>>  drivers/irqchip/irq-gic-common.h    |    1 +
>>  drivers/irqchip/irq-gic-v3.c        |   19 +++++++++++++++++++
>>  7 files changed, 74 insertions(+), 1 deletions(-)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 30398db..751972c 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456
>>  
>>  	  If unsure, say Y.
>>  
>> +config CAVIUM_ERRATUM_28168
>> +       bool "Cavium erratum 28168: Make sure DMA data transfer is done before MSIX"
>> +       depends on ARCH_THUNDER && ARM64
>> +       default y
>> +       help
>> +        Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
>> +        controller are delivered to the interrupt controller before older
>> +        PCI-inbound memory stores are committed. Doing a sync on SMMU
>> +        will make sure all prior data transfers are done before invoking ISR.
>> +
>> +        If unsure, say Y.
> 
> Where is the entry in Documentation/arm64/silicon-errata.txt?
> 
>>  endmenu
>>  
>>  
>> diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
>> index cfbdf02..2ac4ac6 100644
>> --- a/arch/arm64/Kconfig.platforms
>> +++ b/arch/arm64/Kconfig.platforms
>> @@ -185,6 +185,7 @@ config ARCH_SPRD
>>  
>>  config ARCH_THUNDER
>>  	bool "Cavium Inc. Thunder SoC Family"
>> +	select IRQ_PREFLOW_FASTEOI
>>  	help
>>  	  This enables support for Cavium's Thunder Family of SoCs.
>>  
>> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
>> index 758d74f..821fc3c 100644
>> --- a/arch/arm64/include/asm/cpufeature.h
>> +++ b/arch/arm64/include/asm/cpufeature.h
>> @@ -40,8 +40,9 @@
>>  #define ARM64_HAS_32BIT_EL0			13
>>  #define ARM64_HYP_OFFSET_LOW			14
>>  #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
>> +#define ARM64_WORKAROUND_CAVIUM_28168		16
>>  
>> -#define ARM64_NCAPS				16
>> +#define ARM64_NCAPS				17
>>  
>>  #ifndef __ASSEMBLY__
>>  
>> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
>> index 0150394..0841a12 100644
>> --- a/arch/arm64/kernel/cpu_errata.c
>> +++ b/arch/arm64/kernel/cpu_errata.c
>> @@ -122,6 +122,22 @@ static void cpu_enable_trap_ctr_access(void *__unused)
>>  		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
>>  	},
>>  #endif
>> +#ifdef CONFIG_CAVIUM_ERRATUM_28168
>> +	{
>> +	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
>> +		.desc = "Cavium erratum 28168",
>> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
>> +		MIDR_RANGE(MIDR_THUNDERX, 0x00,
>> +			   (1 << MIDR_VARIANT_SHIFT) | 1),
>> +	},
>> +	{
>> +	/* Cavium ThunderX, T81 pass 1.0 */
>> +		.desc = "Cavium erratum 28168",
>> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
>> +		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
>> +	},
>> +#endif
> 
> How is that a CPU bug? Shouldn't that be keyed on the SMMU version or
> the ITs version?

Seconded. I assume the actual component at fault is the PCI RC, SMMU, or
interconnect in between - are there no ID registers in those parts that
will indicate a fixed version (or ECO) in future?

>> +
>>  	{
>>  		.desc = "Mismatched cache line size",
>>  		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index c841eb7..1b4555c 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -570,6 +570,30 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>>  	}
>>  }
>>  
>> +/*
>> + * Cavium ThunderX erratum 28168
>> + *
>> + * Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
>> + * controller are delivered to the interrupt controller before older
>> + * PCI-inbound memory stores are committed. Doing a sync on SMMU
>> + * will make sure all prior data transfers are completed before
>> + * invoking ISR.
>> + *
>> + */
>> +void cavium_arm_smmu_tlb_sync(struct device *dev)
>> +{
>> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
>> +	struct arm_smmu_device *smmu;
>> +
>> +	smmu = fwspec_smmu(fwspec);
>> +	if (!smmu)
>> +		return;
>> +	__arm_smmu_tlb_sync(smmu);

Further to my questions on the last posting, is TLBGSYNC alone
guaranteed to always do something, even if there are no outstanding
invalidation requests? Will this workaround still apply if
__arm_smmu_tlb_sync() were to use CBn_TLBSYNC instead?

>> +
>> +}
>> +EXPORT_SYMBOL(cavium_arm_smmu_tlb_sync);
> 
> Why does this need to be exported? The only user can only be built-in.
> 
>> +
>> +
>>  static void arm_smmu_tlb_sync(void *cookie)
>>  {
>>  	struct arm_smmu_domain *smmu_domain = cookie;
>> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
>> index 205e5fd..4e88f55 100644
>> --- a/drivers/irqchip/irq-gic-common.h
>> +++ b/drivers/irqchip/irq-gic-common.h
>> @@ -38,4 +38,5 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  
>>  void gic_set_kvm_info(const struct gic_kvm_info *info);
>>  
>> +void cavium_arm_smmu_tlb_sync(struct device *dev);
> 
> Why should this be visible to GICv2 as well? I have the ugly feeling
> this should stay private to the SMMU code and that a more standard
> mechanism should be used... Robin, is there anything else we could
> piggy-back on?

I'm not sure it's actually any less horrible, but technically, one
*could* externally force a sync like so:

struct iommu_domain *dom = iommu_get_domain_for_dev(dev);
iommu_map(dom, <some reserved IOVA>, <some safe PA>, PAGE_SIZE, IOMMU_READ;
iommu_unmap(dom, <IOVA>, PAGE_SIZE);

(needs also to be error-safe and race-safe, obviously)

although there's rather a lot of extra overhead involved, and it also
relies on the driver not doing any Intel-style lazy unmapping.

The other 'generic' way which comes to mind would be some magic domain
attribute which has a sync side-effect on read (or write), but that's
utterly vile. And I'm not even going to entertain the thought of having
the SMMU driver implement a fake irqchip stacked on top of the ITS for
the sake of keeping all the code in one place...

>>  #endif /* _IRQ_GIC_COMMON_H */
>> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
>> index 19d642e..723cebe 100644
>> --- a/drivers/irqchip/irq-gic-v3.c
>> +++ b/drivers/irqchip/irq-gic-v3.c
>> @@ -28,6 +28,8 @@
>>  #include <linux/of_irq.h>
>>  #include <linux/percpu.h>
>>  #include <linux/slab.h>
>> +#include <linux/msi.h>
>> +#include <linux/pci.h>
>>  
>>  #include <linux/irqchip.h>
>>  #include <linux/irqchip/arm-gic-common.h>
>> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>>  
>>  #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>>  
>> +/*
>> + * Due to #28168 erratum in ThunderX,
>> + * we need to make sure DMA data transfer is done before MSIX.
>> + */
>> +static void cavium_irq_perflow_handler(struct irq_data *data)

perflow?

>> +{
>> +	struct pci_dev *pdev;
>> +
>> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
> 
> What happens if this is not a PCI device?
> 
>> +	if ((pdev->vendor != 0x177d) &&
>> +			((pdev->device & 0xA000) != 0xA000))
>> +		cavium_arm_smmu_tlb_sync(&pdev->dev);
> 
> I've asked that before. What makes Cavium devices so special that they
> are not sensitive to this bug?
> 
>> +}
>> +
>>  static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>  			      irq_hw_number_t hw)
>>  {
>> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>  			return -EPERM;
>>  		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>>  				    handle_fasteoi_irq, NULL, NULL);
>> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
>> +			__irq_set_preflow_handler(irq,
>> +						  cavium_irq_perflow_handler);
> 
> What happens if SMMUv2 is not compiled in?

drivers/built-in.o: In function `cavium_irq_perflow_handler':
/work/src/linux/drivers/irqchip/irq-gic-v3.c:752: undefined reference to
`cavium_arm_smmu_tlb_sync'
make: *** [vmlinux] Error 1

Robin

> Also, since this only affects
> LPI signaling, why is this in the core GICv3 code and not in the ITS.
> And more specifically, in the PCI part of the ITS, since you seem to
> exclusively consider PCI?
> 
>>  	}
>>  
>>  	return 0;
>>
> 
> Thanks,
> 
> 	M.
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
@ 2016-11-15 12:36         ` Robin Murphy
  0 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2016-11-15 12:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 15/11/16 09:26, Marc Zyngier wrote:
> On 15/11/16 07:00, Geetha sowjanya wrote:
>> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
>>
>>   This patch implements Cavium ThunderX erratum 28168.
>>
>>   PCI requires stores complete in order. Due to erratum #28168
>>   PCI-inbound MSI-X store to the interrupt controller are delivered
>>   to the interrupt controller before older PCI-inbound memory stores
>>   are committed.
>>   Doing a sync on SMMU will make sure all prior data transfers are
>>   completed before invoking ISR.
>>
>> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
>> Signed-off-by: Geetha sowjanya <gakula@caviumnetworks.com>
>> ---
>>  arch/arm64/Kconfig                  |   11 +++++++++++
>>  arch/arm64/Kconfig.platforms        |    1 +
>>  arch/arm64/include/asm/cpufeature.h |    3 ++-
>>  arch/arm64/kernel/cpu_errata.c      |   16 ++++++++++++++++
>>  drivers/iommu/arm-smmu.c            |   24 ++++++++++++++++++++++++
>>  drivers/irqchip/irq-gic-common.h    |    1 +
>>  drivers/irqchip/irq-gic-v3.c        |   19 +++++++++++++++++++
>>  7 files changed, 74 insertions(+), 1 deletions(-)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 30398db..751972c 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456
>>  
>>  	  If unsure, say Y.
>>  
>> +config CAVIUM_ERRATUM_28168
>> +       bool "Cavium erratum 28168: Make sure DMA data transfer is done before MSIX"
>> +       depends on ARCH_THUNDER && ARM64
>> +       default y
>> +       help
>> +        Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
>> +        controller are delivered to the interrupt controller before older
>> +        PCI-inbound memory stores are committed. Doing a sync on SMMU
>> +        will make sure all prior data transfers are done before invoking ISR.
>> +
>> +        If unsure, say Y.
> 
> Where is the entry in Documentation/arm64/silicon-errata.txt?
> 
>>  endmenu
>>  
>>  
>> diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
>> index cfbdf02..2ac4ac6 100644
>> --- a/arch/arm64/Kconfig.platforms
>> +++ b/arch/arm64/Kconfig.platforms
>> @@ -185,6 +185,7 @@ config ARCH_SPRD
>>  
>>  config ARCH_THUNDER
>>  	bool "Cavium Inc. Thunder SoC Family"
>> +	select IRQ_PREFLOW_FASTEOI
>>  	help
>>  	  This enables support for Cavium's Thunder Family of SoCs.
>>  
>> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
>> index 758d74f..821fc3c 100644
>> --- a/arch/arm64/include/asm/cpufeature.h
>> +++ b/arch/arm64/include/asm/cpufeature.h
>> @@ -40,8 +40,9 @@
>>  #define ARM64_HAS_32BIT_EL0			13
>>  #define ARM64_HYP_OFFSET_LOW			14
>>  #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
>> +#define ARM64_WORKAROUND_CAVIUM_28168		16
>>  
>> -#define ARM64_NCAPS				16
>> +#define ARM64_NCAPS				17
>>  
>>  #ifndef __ASSEMBLY__
>>  
>> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
>> index 0150394..0841a12 100644
>> --- a/arch/arm64/kernel/cpu_errata.c
>> +++ b/arch/arm64/kernel/cpu_errata.c
>> @@ -122,6 +122,22 @@ static void cpu_enable_trap_ctr_access(void *__unused)
>>  		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
>>  	},
>>  #endif
>> +#ifdef CONFIG_CAVIUM_ERRATUM_28168
>> +	{
>> +	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
>> +		.desc = "Cavium erratum 28168",
>> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
>> +		MIDR_RANGE(MIDR_THUNDERX, 0x00,
>> +			   (1 << MIDR_VARIANT_SHIFT) | 1),
>> +	},
>> +	{
>> +	/* Cavium ThunderX, T81 pass 1.0 */
>> +		.desc = "Cavium erratum 28168",
>> +		.capability = ARM64_WORKAROUND_CAVIUM_28168,
>> +		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
>> +	},
>> +#endif
> 
> How is that a CPU bug? Shouldn't that be keyed on the SMMU version or
> the ITs version?

Seconded. I assume the actual component at fault is the PCI RC, SMMU, or
interconnect in between - are there no ID registers in those parts that
will indicate a fixed version (or ECO) in future?

>> +
>>  	{
>>  		.desc = "Mismatched cache line size",
>>  		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index c841eb7..1b4555c 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -570,6 +570,30 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>>  	}
>>  }
>>  
>> +/*
>> + * Cavium ThunderX erratum 28168
>> + *
>> + * Due to erratum #28168 PCI-inbound MSI-X store to the interrupt
>> + * controller are delivered to the interrupt controller before older
>> + * PCI-inbound memory stores are committed. Doing a sync on SMMU
>> + * will make sure all prior data transfers are completed before
>> + * invoking ISR.
>> + *
>> + */
>> +void cavium_arm_smmu_tlb_sync(struct device *dev)
>> +{
>> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
>> +	struct arm_smmu_device *smmu;
>> +
>> +	smmu = fwspec_smmu(fwspec);
>> +	if (!smmu)
>> +		return;
>> +	__arm_smmu_tlb_sync(smmu);

Further to my questions on the last posting, is TLBGSYNC alone
guaranteed to always do something, even if there are no outstanding
invalidation requests? Will this workaround still apply if
__arm_smmu_tlb_sync() were to use CBn_TLBSYNC instead?

>> +
>> +}
>> +EXPORT_SYMBOL(cavium_arm_smmu_tlb_sync);
> 
> Why does this need to be exported? The only user can only be built-in.
> 
>> +
>> +
>>  static void arm_smmu_tlb_sync(void *cookie)
>>  {
>>  	struct arm_smmu_domain *smmu_domain = cookie;
>> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
>> index 205e5fd..4e88f55 100644
>> --- a/drivers/irqchip/irq-gic-common.h
>> +++ b/drivers/irqchip/irq-gic-common.h
>> @@ -38,4 +38,5 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  
>>  void gic_set_kvm_info(const struct gic_kvm_info *info);
>>  
>> +void cavium_arm_smmu_tlb_sync(struct device *dev);
> 
> Why should this be visible to GICv2 as well? I have the ugly feeling
> this should stay private to the SMMU code and that a more standard
> mechanism should be used... Robin, is there anything else we could
> piggy-back on?

I'm not sure it's actually any less horrible, but technically, one
*could* externally force a sync like so:

struct iommu_domain *dom = iommu_get_domain_for_dev(dev);
iommu_map(dom, <some reserved IOVA>, <some safe PA>, PAGE_SIZE, IOMMU_READ;
iommu_unmap(dom, <IOVA>, PAGE_SIZE);

(needs also to be error-safe and race-safe, obviously)

although there's rather a lot of extra overhead involved, and it also
relies on the driver not doing any Intel-style lazy unmapping.

The other 'generic' way which comes to mind would be some magic domain
attribute which has a sync side-effect on read (or write), but that's
utterly vile. And I'm not even going to entertain the thought of having
the SMMU driver implement a fake irqchip stacked on top of the ITS for
the sake of keeping all the code in one place...

>>  #endif /* _IRQ_GIC_COMMON_H */
>> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
>> index 19d642e..723cebe 100644
>> --- a/drivers/irqchip/irq-gic-v3.c
>> +++ b/drivers/irqchip/irq-gic-v3.c
>> @@ -28,6 +28,8 @@
>>  #include <linux/of_irq.h>
>>  #include <linux/percpu.h>
>>  #include <linux/slab.h>
>> +#include <linux/msi.h>
>> +#include <linux/pci.h>
>>  
>>  #include <linux/irqchip.h>
>>  #include <linux/irqchip/arm-gic-common.h>
>> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>>  
>>  #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>>  
>> +/*
>> + * Due to #28168 erratum in ThunderX,
>> + * we need to make sure DMA data transfer is done before MSIX.
>> + */
>> +static void cavium_irq_perflow_handler(struct irq_data *data)

perflow?

>> +{
>> +	struct pci_dev *pdev;
>> +
>> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
> 
> What happens if this is not a PCI device?
> 
>> +	if ((pdev->vendor != 0x177d) &&
>> +			((pdev->device & 0xA000) != 0xA000))
>> +		cavium_arm_smmu_tlb_sync(&pdev->dev);
> 
> I've asked that before. What makes Cavium devices so special that they
> are not sensitive to this bug?
> 
>> +}
>> +
>>  static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>  			      irq_hw_number_t hw)
>>  {
>> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>  			return -EPERM;
>>  		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>>  				    handle_fasteoi_irq, NULL, NULL);
>> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
>> +			__irq_set_preflow_handler(irq,
>> +						  cavium_irq_perflow_handler);
> 
> What happens if SMMUv2 is not compiled in?

drivers/built-in.o: In function `cavium_irq_perflow_handler':
/work/src/linux/drivers/irqchip/irq-gic-v3.c:752: undefined reference to
`cavium_arm_smmu_tlb_sync'
make: *** [vmlinux] Error 1

Robin

> Also, since this only affects
> LPI signaling, why is this in the core GICv3 code and not in the ITS.
> And more specifically, in the PCI part of the ITS, since you seem to
> exclusively consider PCI?
> 
>>  	}
>>  
>>  	return 0;
>>
> 
> Thanks,
> 
> 	M.
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
  2016-11-15  9:26     ` Marc Zyngier
@ 2016-11-15 18:24         ` David Daney
  -1 siblings, 0 replies; 10+ messages in thread
From: David Daney @ 2016-11-15 18:24 UTC (permalink / raw)
  To: Marc Zyngier, Tirumalesh Chalamarla
  Cc: Prasun.Kapoor-YGCgFSpz5w/QT0dZR+AlfA,
	jason-NLaQJdtUoK4Be96aLqz0jA, suzuki.poulose-5wv7dgnIgG8,
	catalin.marinas-5wv7dgnIgG8, Geetha sowjanya,
	will.deacon-5wv7dgnIgG8,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	james.morse-5wv7dgnIgG8, tglx-hfZtesqFncYOwBW4kG4KsQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On 11/15/2016 01:26 AM, Marc Zyngier wrote:
> On 15/11/16 07:00, Geetha sowjanya wrote:
>> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
>>
>>    This patch implements Cavium ThunderX erratum 28168.
>>
>>    PCI requires stores complete in order. Due to erratum #28168
>>    PCI-inbound MSI-X store to the interrupt controller are delivered
>>    to the interrupt controller before older PCI-inbound memory stores
>>    are committed.
>>    Doing a sync on SMMU will make sure all prior data transfers are
>>    completed before invoking ISR.
>>
>> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
>> Signed-off-by: Geetha sowjanya <gakula-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
[...]
>> --- a/drivers/irqchip/irq-gic-v3.c
>> +++ b/drivers/irqchip/irq-gic-v3.c
>> @@ -28,6 +28,8 @@
>>   #include <linux/of_irq.h>
>>   #include <linux/percpu.h>
>>   #include <linux/slab.h>
>> +#include <linux/msi.h>
>> +#include <linux/pci.h>
>>
>>   #include <linux/irqchip.h>
>>   #include <linux/irqchip/arm-gic-common.h>
>> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>>
>>   #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>>
>> +/*
>> + * Due to #28168 erratum in ThunderX,
>> + * we need to make sure DMA data transfer is done before MSIX.
>> + */
>> +static void cavium_irq_perflow_handler(struct irq_data *data)
>> +{
>> +	struct pci_dev *pdev;
>> +
>> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
>
> What happens if this is not a PCI device?
>
>> +	if ((pdev->vendor != 0x177d) &&
>> +			((pdev->device & 0xA000) != 0xA000))
>> +		cavium_arm_smmu_tlb_sync(&pdev->dev);
>
> I've asked that before. What makes Cavium devices so special that they
> are not sensitive to this bug?


This is a heuristic for devices connected to external PCIe buses as 
opposed to on-SoC devices (which don't suffer from the erratum).

In any event what would happen if we got rid of this check and ...


>
>> +}
>> +
>>   static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>   			      irq_hw_number_t hw)
>>   {
>> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>   			return -EPERM;
>>   		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>>   				    handle_fasteoi_irq, NULL, NULL);
>> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
>> +			__irq_set_preflow_handler(irq,
>> +						  cavium_irq_perflow_handler);
>

... move the registration of the preflow_handler into a 
msi_domain_ops.msi_finish() handler in irq-git-v3-its-pic-msi.c?

There we will know that it is a pci device, and can walk up the bus 
hierarchy to see if there is a Cavium PCIe root port present.  If such a 
port is found, we know we are on an external Cavium PCIe bus, and can 
register the preflow_handler without having to check the device identifiers.



> What happens if SMMUv2 is not compiled in? Also, since this only affects
> LPI signaling, why is this in the core GICv3 code and not in the ITS.
> And more specifically, in the PCI part of the ITS, since you seem to
> exclusively consider PCI?
>
>>   	}
>>
>>   	return 0;
>>
>
> Thanks,
>
> 	M.
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
@ 2016-11-15 18:24         ` David Daney
  0 siblings, 0 replies; 10+ messages in thread
From: David Daney @ 2016-11-15 18:24 UTC (permalink / raw)
  To: linux-arm-kernel

On 11/15/2016 01:26 AM, Marc Zyngier wrote:
> On 15/11/16 07:00, Geetha sowjanya wrote:
>> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
>>
>>    This patch implements Cavium ThunderX erratum 28168.
>>
>>    PCI requires stores complete in order. Due to erratum #28168
>>    PCI-inbound MSI-X store to the interrupt controller are delivered
>>    to the interrupt controller before older PCI-inbound memory stores
>>    are committed.
>>    Doing a sync on SMMU will make sure all prior data transfers are
>>    completed before invoking ISR.
>>
>> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
>> Signed-off-by: Geetha sowjanya <gakula@caviumnetworks.com>
[...]
>> --- a/drivers/irqchip/irq-gic-v3.c
>> +++ b/drivers/irqchip/irq-gic-v3.c
>> @@ -28,6 +28,8 @@
>>   #include <linux/of_irq.h>
>>   #include <linux/percpu.h>
>>   #include <linux/slab.h>
>> +#include <linux/msi.h>
>> +#include <linux/pci.h>
>>
>>   #include <linux/irqchip.h>
>>   #include <linux/irqchip/arm-gic-common.h>
>> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>>
>>   #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>>
>> +/*
>> + * Due to #28168 erratum in ThunderX,
>> + * we need to make sure DMA data transfer is done before MSIX.
>> + */
>> +static void cavium_irq_perflow_handler(struct irq_data *data)
>> +{
>> +	struct pci_dev *pdev;
>> +
>> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
>
> What happens if this is not a PCI device?
>
>> +	if ((pdev->vendor != 0x177d) &&
>> +			((pdev->device & 0xA000) != 0xA000))
>> +		cavium_arm_smmu_tlb_sync(&pdev->dev);
>
> I've asked that before. What makes Cavium devices so special that they
> are not sensitive to this bug?


This is a heuristic for devices connected to external PCIe buses as 
opposed to on-SoC devices (which don't suffer from the erratum).

In any event what would happen if we got rid of this check and ...


>
>> +}
>> +
>>   static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>   			      irq_hw_number_t hw)
>>   {
>> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>   			return -EPERM;
>>   		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>>   				    handle_fasteoi_irq, NULL, NULL);
>> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
>> +			__irq_set_preflow_handler(irq,
>> +						  cavium_irq_perflow_handler);
>

... move the registration of the preflow_handler into a 
msi_domain_ops.msi_finish() handler in irq-git-v3-its-pic-msi.c?

There we will know that it is a pci device, and can walk up the bus 
hierarchy to see if there is a Cavium PCIe root port present.  If such a 
port is found, we know we are on an external Cavium PCIe bus, and can 
register the preflow_handler without having to check the device identifiers.



> What happens if SMMUv2 is not compiled in? Also, since this only affects
> LPI signaling, why is this in the core GICv3 code and not in the ITS.
> And more specifically, in the PCI part of the ITS, since you seem to
> exclusively consider PCI?
>
>>   	}
>>
>>   	return 0;
>>
>
> Thanks,
>
> 	M.
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
  2016-11-15 18:24         ` David Daney
@ 2016-11-16  9:54             ` Marc Zyngier
  -1 siblings, 0 replies; 10+ messages in thread
From: Marc Zyngier @ 2016-11-16  9:54 UTC (permalink / raw)
  To: David Daney, Tirumalesh Chalamarla
  Cc: Prasun.Kapoor-YGCgFSpz5w/QT0dZR+AlfA,
	jason-NLaQJdtUoK4Be96aLqz0jA, suzuki.poulose-5wv7dgnIgG8,
	catalin.marinas-5wv7dgnIgG8, Geetha sowjanya,
	will.deacon-5wv7dgnIgG8,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	james.morse-5wv7dgnIgG8, tglx-hfZtesqFncYOwBW4kG4KsQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On 15/11/16 18:24, David Daney wrote:
> On 11/15/2016 01:26 AM, Marc Zyngier wrote:
>> On 15/11/16 07:00, Geetha sowjanya wrote:
>>> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
>>>
>>>    This patch implements Cavium ThunderX erratum 28168.
>>>
>>>    PCI requires stores complete in order. Due to erratum #28168
>>>    PCI-inbound MSI-X store to the interrupt controller are delivered
>>>    to the interrupt controller before older PCI-inbound memory stores
>>>    are committed.
>>>    Doing a sync on SMMU will make sure all prior data transfers are
>>>    completed before invoking ISR.
>>>
>>> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
>>> Signed-off-by: Geetha sowjanya <gakula-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
> [...]
>>> --- a/drivers/irqchip/irq-gic-v3.c
>>> +++ b/drivers/irqchip/irq-gic-v3.c
>>> @@ -28,6 +28,8 @@
>>>   #include <linux/of_irq.h>
>>>   #include <linux/percpu.h>
>>>   #include <linux/slab.h>
>>> +#include <linux/msi.h>
>>> +#include <linux/pci.h>
>>>
>>>   #include <linux/irqchip.h>
>>>   #include <linux/irqchip/arm-gic-common.h>
>>> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>>>
>>>   #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>>>
>>> +/*
>>> + * Due to #28168 erratum in ThunderX,
>>> + * we need to make sure DMA data transfer is done before MSIX.
>>> + */
>>> +static void cavium_irq_perflow_handler(struct irq_data *data)
>>> +{
>>> +	struct pci_dev *pdev;
>>> +
>>> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
>>
>> What happens if this is not a PCI device?
>>
>>> +	if ((pdev->vendor != 0x177d) &&
>>> +			((pdev->device & 0xA000) != 0xA000))
>>> +		cavium_arm_smmu_tlb_sync(&pdev->dev);
>>
>> I've asked that before. What makes Cavium devices so special that they
>> are not sensitive to this bug?
> 
> 
> This is a heuristic for devices connected to external PCIe buses as 
> opposed to on-SoC devices (which don't suffer from the erratum).
> 
> In any event what would happen if we got rid of this check and ...
> 
> 
>>
>>> +}
>>> +
>>>   static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>>   			      irq_hw_number_t hw)
>>>   {
>>> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>>   			return -EPERM;
>>>   		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>>>   				    handle_fasteoi_irq, NULL, NULL);
>>> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
>>> +			__irq_set_preflow_handler(irq,
>>> +						  cavium_irq_perflow_handler);
>>
> 
> ... move the registration of the preflow_handler into a 
> msi_domain_ops.msi_finish() handler in irq-git-v3-its-pic-msi.c?

That's the kind of thing I was angling for. You'll have to store the
device pointer into the scratchpad (we still have plenty of space there)
so that msi_finish() can have a peek.

> There we will know that it is a pci device, and can walk up the bus 
> hierarchy to see if there is a Cavium PCIe root port present.  If such a 
> port is found, we know we are on an external Cavium PCIe bus, and can 
> register the preflow_handler without having to check the device identifiers.

Something like that (though I'm unclear why other devices wouldn't see a
root port, but that's probably me lacking some PCIe foo).

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168
@ 2016-11-16  9:54             ` Marc Zyngier
  0 siblings, 0 replies; 10+ messages in thread
From: Marc Zyngier @ 2016-11-16  9:54 UTC (permalink / raw)
  To: linux-arm-kernel

On 15/11/16 18:24, David Daney wrote:
> On 11/15/2016 01:26 AM, Marc Zyngier wrote:
>> On 15/11/16 07:00, Geetha sowjanya wrote:
>>> From: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
>>>
>>>    This patch implements Cavium ThunderX erratum 28168.
>>>
>>>    PCI requires stores complete in order. Due to erratum #28168
>>>    PCI-inbound MSI-X store to the interrupt controller are delivered
>>>    to the interrupt controller before older PCI-inbound memory stores
>>>    are committed.
>>>    Doing a sync on SMMU will make sure all prior data transfers are
>>>    completed before invoking ISR.
>>>
>>> Signed-off-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
>>> Signed-off-by: Geetha sowjanya <gakula@caviumnetworks.com>
> [...]
>>> --- a/drivers/irqchip/irq-gic-v3.c
>>> +++ b/drivers/irqchip/irq-gic-v3.c
>>> @@ -28,6 +28,8 @@
>>>   #include <linux/of_irq.h>
>>>   #include <linux/percpu.h>
>>>   #include <linux/slab.h>
>>> +#include <linux/msi.h>
>>> +#include <linux/pci.h>
>>>
>>>   #include <linux/irqchip.h>
>>>   #include <linux/irqchip/arm-gic-common.h>
>>> @@ -736,6 +738,20 @@ static inline void gic_cpu_pm_init(void) { }
>>>
>>>   #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
>>>
>>> +/*
>>> + * Due to #28168 erratum in ThunderX,
>>> + * we need to make sure DMA data transfer is done before MSIX.
>>> + */
>>> +static void cavium_irq_perflow_handler(struct irq_data *data)
>>> +{
>>> +	struct pci_dev *pdev;
>>> +
>>> +	pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
>>
>> What happens if this is not a PCI device?
>>
>>> +	if ((pdev->vendor != 0x177d) &&
>>> +			((pdev->device & 0xA000) != 0xA000))
>>> +		cavium_arm_smmu_tlb_sync(&pdev->dev);
>>
>> I've asked that before. What makes Cavium devices so special that they
>> are not sensitive to this bug?
> 
> 
> This is a heuristic for devices connected to external PCIe buses as 
> opposed to on-SoC devices (which don't suffer from the erratum).
> 
> In any event what would happen if we got rid of this check and ...
> 
> 
>>
>>> +}
>>> +
>>>   static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>>   			      irq_hw_number_t hw)
>>>   {
>>> @@ -773,6 +789,9 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>>>   			return -EPERM;
>>>   		irq_domain_set_info(d, irq, hw, chip, d->host_data,
>>>   				    handle_fasteoi_irq, NULL, NULL);
>>> +		if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_28168))
>>> +			__irq_set_preflow_handler(irq,
>>> +						  cavium_irq_perflow_handler);
>>
> 
> ... move the registration of the preflow_handler into a 
> msi_domain_ops.msi_finish() handler in irq-git-v3-its-pic-msi.c?

That's the kind of thing I was angling for. You'll have to store the
device pointer into the scratchpad (we still have plenty of space there)
so that msi_finish() can have a peek.

> There we will know that it is a pci device, and can walk up the bus 
> hierarchy to see if there is a Cavium PCIe root port present.  If such a 
> port is found, we know we are on an external Cavium PCIe bus, and can 
> register the preflow_handler without having to check the device identifiers.

Something like that (though I'm unclear why other devices wouldn't see a
root port, but that's probably me lacking some PCIe foo).

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-11-16  9:54 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-15  7:00 [PATCH v2] arm64: SMMU-v2: Workaround for Cavium ThunderX erratum 28168 Geetha sowjanya
2016-11-15  7:00 ` Geetha sowjanya
     [not found] ` <1479193220-6693-1-git-send-email-gakula-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
2016-11-15  9:26   ` Marc Zyngier
2016-11-15  9:26     ` Marc Zyngier
     [not found]     ` <0414d797-7134-8192-d373-b14b26edd023-5wv7dgnIgG8@public.gmane.org>
2016-11-15 12:36       ` Robin Murphy
2016-11-15 12:36         ` Robin Murphy
2016-11-15 18:24       ` David Daney
2016-11-15 18:24         ` David Daney
     [not found]         ` <582B52D6.6060907-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
2016-11-16  9:54           ` Marc Zyngier
2016-11-16  9:54             ` Marc Zyngier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.