All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-08-30 10:39 ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: robh, bhelgaas, colin.king, soren.brinkmann, marc.zyngier,
	michal.simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, rgummal, Bharat Kumar Gogada

The current driver prints pcie core error, for all core events.
Instead of just printing PCIe core error, now adding prints to show
individual core events occurred.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 48 +++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 3479d30..86c1834 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -85,10 +85,15 @@
 #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
 #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
 #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
-#define MSGF_MISC_SR_UR_DETECT          BIT(20)
-
-#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
-#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
+#define MSGF_MISC_SR_FATAL_AER		BIT(16)
+#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
+#define MSGF_MISC_SR_CORR_AER		BIT(18)
+#define MSGF_MISC_SR_UR_DETECT		BIT(20)
+#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
+#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
+#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
+#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
+#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
 
 #define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
 					MSGF_MISC_SR_RXMSG_OVER | \
@@ -96,9 +101,15 @@
 					MSGF_MISC_SR_MASTER_ERR | \
 					MSGF_MISC_SR_I_ADDR_ERR | \
 					MSGF_MISC_SR_E_ADDR_ERR | \
+					MSGF_MISC_SR_FATAL_AER | \
+					MSGF_MISC_SR_NON_FATAL_AER | \
+					MSGF_MISC_SR_CORR_AER | \
 					MSGF_MISC_SR_UR_DETECT | \
-					MSGF_MISC_SR_PCIE_CORE | \
-					MSGF_MISC_SR_PCIE_CORE_ERR)
+					MSGF_MISC_SR_NON_FATAL_DEV | \
+					MSGF_MISC_SR_FATAL_DEV | \
+					MSGF_MISC_SR_LINK_DOWN | \
+					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
+					MSGF_MSIC_SR_LINK_BWIDTH)
 
 /* Legacy interrupt status mask bits */
 #define MSGF_LEG_SR_INTA		BIT(0)
@@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
 		dev_err(pcie->dev,
 			"In Misc Egress address translation error\n");
 
-	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
-		dev_err(pcie->dev, "PCIe Core error\n");
+	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
+		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
+		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_CORR_AER)
+		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
+		dev_err(pcie->dev, "Unsupported request Detected\n");
+
+	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
+		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
+
+	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
+		dev_err(pcie->dev, "Fatal Error Detected\n");
+
+	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
+		dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n");
+
+	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
+		dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n");
 
 	/* Clear misc interrupt status */
 	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-08-30 10:39 ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: linux-arm-kernel

The current driver prints pcie core error, for all core events.
Instead of just printing PCIe core error, now adding prints to show
individual core events occurred.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 48 +++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 3479d30..86c1834 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -85,10 +85,15 @@
 #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
 #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
 #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
-#define MSGF_MISC_SR_UR_DETECT          BIT(20)
-
-#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
-#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
+#define MSGF_MISC_SR_FATAL_AER		BIT(16)
+#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
+#define MSGF_MISC_SR_CORR_AER		BIT(18)
+#define MSGF_MISC_SR_UR_DETECT		BIT(20)
+#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
+#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
+#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
+#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
+#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
 
 #define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
 					MSGF_MISC_SR_RXMSG_OVER | \
@@ -96,9 +101,15 @@
 					MSGF_MISC_SR_MASTER_ERR | \
 					MSGF_MISC_SR_I_ADDR_ERR | \
 					MSGF_MISC_SR_E_ADDR_ERR | \
+					MSGF_MISC_SR_FATAL_AER | \
+					MSGF_MISC_SR_NON_FATAL_AER | \
+					MSGF_MISC_SR_CORR_AER | \
 					MSGF_MISC_SR_UR_DETECT | \
-					MSGF_MISC_SR_PCIE_CORE | \
-					MSGF_MISC_SR_PCIE_CORE_ERR)
+					MSGF_MISC_SR_NON_FATAL_DEV | \
+					MSGF_MISC_SR_FATAL_DEV | \
+					MSGF_MISC_SR_LINK_DOWN | \
+					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
+					MSGF_MSIC_SR_LINK_BWIDTH)
 
 /* Legacy interrupt status mask bits */
 #define MSGF_LEG_SR_INTA		BIT(0)
@@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
 		dev_err(pcie->dev,
 			"In Misc Egress address translation error\n");
 
-	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
-		dev_err(pcie->dev, "PCIe Core error\n");
+	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
+		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
+		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_CORR_AER)
+		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
+		dev_err(pcie->dev, "Unsupported request Detected\n");
+
+	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
+		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
+
+	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
+		dev_err(pcie->dev, "Fatal Error Detected\n");
+
+	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
+		dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n");
+
+	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
+		dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n");
 
 	/* Clear misc interrupt status */
 	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 2/3] PCI: Xilinx NWL PCIe: Enabling all MSI interrupts using MSI mask.
  2016-08-30 10:39 ` Bharat Kumar Gogada
  (?)
@ 2016-08-30 10:39   ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: robh, bhelgaas, colin.king, soren.brinkmann, marc.zyngier,
	michal.simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, rgummal, Bharat Kumar Gogada

The current mask enables and allows only one MSI interrupt on each MSI line.
This change, enables all MSI interrupts, which will also support End Points
with multi MSI support.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 86c1834..d8d43e6 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -120,8 +120,8 @@
 					MSGF_LEG_SR_INTC | MSGF_LEG_SR_INTD)
 
 /* MSI interrupt status mask bits */
-#define MSGF_MSI_SR_LO_MASK		BIT(0)
-#define MSGF_MSI_SR_HI_MASK		BIT(0)
+#define MSGF_MSI_SR_LO_MASK		GENMASK(31, 0)
+#define MSGF_MSI_SR_HI_MASK		GENMASK(31, 0)
 
 #define MSII_PRESENT			BIT(0)
 #define MSII_ENABLE			BIT(0)
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 2/3] PCI: Xilinx NWL PCIe: Enabling all MSI interrupts using MSI mask.
@ 2016-08-30 10:39   ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: robh, bhelgaas, colin.king, soren.brinkmann, marc.zyngier,
	michal.simek, arnd
  Cc: Bharat Kumar Gogada, linux-pci, rgummal, linux-kernel, linux-arm-kernel

The current mask enables and allows only one MSI interrupt on each MSI line.
This change, enables all MSI interrupts, which will also support End Points
with multi MSI support.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 86c1834..d8d43e6 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -120,8 +120,8 @@
 					MSGF_LEG_SR_INTC | MSGF_LEG_SR_INTD)
 
 /* MSI interrupt status mask bits */
-#define MSGF_MSI_SR_LO_MASK		BIT(0)
-#define MSGF_MSI_SR_HI_MASK		BIT(0)
+#define MSGF_MSI_SR_LO_MASK		GENMASK(31, 0)
+#define MSGF_MSI_SR_HI_MASK		GENMASK(31, 0)
 
 #define MSII_PRESENT			BIT(0)
 #define MSII_ENABLE			BIT(0)
-- 
2.1.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 2/3] PCI: Xilinx NWL PCIe: Enabling all MSI interrupts using MSI mask.
@ 2016-08-30 10:39   ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: linux-arm-kernel

The current mask enables and allows only one MSI interrupt on each MSI line.
This change, enables all MSI interrupts, which will also support End Points
with multi MSI support.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 86c1834..d8d43e6 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -120,8 +120,8 @@
 					MSGF_LEG_SR_INTC | MSGF_LEG_SR_INTD)
 
 /* MSI interrupt status mask bits */
-#define MSGF_MSI_SR_LO_MASK		BIT(0)
-#define MSGF_MSI_SR_HI_MASK		BIT(0)
+#define MSGF_MSI_SR_LO_MASK		GENMASK(31, 0)
+#define MSGF_MSI_SR_HI_MASK		GENMASK(31, 0)
 
 #define MSII_PRESENT			BIT(0)
 #define MSII_ENABLE			BIT(0)
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-30 10:39 ` Bharat Kumar Gogada
@ 2016-08-30 10:39   ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: robh, bhelgaas, colin.king, soren.brinkmann, marc.zyngier,
	michal.simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, rgummal, Bharat Kumar Gogada

PCIe legacy interrupts start at 1, not at 0.
When testing with multi function device "error: hwirq 0x4 is too large for
dummy" error comes.
So adding one addtional interrupt when creating irq domain.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index d8d43e6..9f04411 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
 	}
 
 	pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
-							INTX_NUM,
+							INTX_NUM + 1,
 							&legacy_domain_ops,
 							pcie);
 
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-30 10:39   ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 10:39 UTC (permalink / raw)
  To: linux-arm-kernel

PCIe legacy interrupts start at 1, not at 0.
When testing with multi function device "error: hwirq 0x4 is too large for
dummy" error comes.
So adding one addtional interrupt when creating irq domain.

Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
---
 drivers/pci/host/pcie-xilinx-nwl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index d8d43e6..9f04411 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
 	}
 
 	pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
-							INTX_NUM,
+							INTX_NUM + 1,
 							&legacy_domain_ops,
 							pcie);
 
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-30 10:39   ` Bharat Kumar Gogada
@ 2016-08-30 12:17     ` Marc Zyngier
  -1 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-30 12:17 UTC (permalink / raw)
  To: Bharat Kumar Gogada, robh, bhelgaas, colin.king, soren.brinkmann,
	michal.simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, rgummal, Bharat Kumar Gogada

Hi Bharat,

On 30/08/16 11:39, Bharat Kumar Gogada wrote:
> PCIe legacy interrupts start at 1, not at 0.
> When testing with multi function device "error: hwirq 0x4 is too large for
> dummy" error comes.
> So adding one addtional interrupt when creating irq domain.
> 
> Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> ---
>  drivers/pci/host/pcie-xilinx-nwl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
> index d8d43e6..9f04411 100644
> --- a/drivers/pci/host/pcie-xilinx-nwl.c
> +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
>  	}
>  
>  	pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> -							INTX_NUM,
> +							INTX_NUM + 1,
>  							&legacy_domain_ops,
>  							pcie);

This feels like the wrong thing to do. You have INTX_NUM irqs, so the
domain allocation should reflect this. On the other hand, the way the
driver currently deals with mappings is quite broken (consistently
adding 1 to the HW interrupt).

How about something like this instead?

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 0b597d9..72b159f 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -314,8 +314,7 @@ static void nwl_pcie_leg_handler(struct irq_desc *desc)
 	while ((status = nwl_bridge_readl(pcie, MSGF_LEG_STATUS) &
 				MSGF_LEG_SR_MASKALL) != 0) {
 		for_each_set_bit(bit, &status, INTX_NUM) {
-			virq = irq_find_mapping(pcie->legacy_irq_domain,
-						bit + 1);
+			virq = irq_find_mapping(pcie->legacy_irq_domain, bit);
 			if (virq)
 				generic_handle_irq(virq);
 		}
@@ -483,7 +482,7 @@ static void nwl_pcie_free_irq_domain(struct nwl_pcie *pcie)
 	u32 irq;
 
 	for (i = 0; i < INTX_NUM; i++) {
-		irq = irq_find_mapping(pcie->legacy_irq_domain, i + 1);
+		irq = irq_find_mapping(pcie->legacy_irq_domain, i);
 		if (irq > 0)
 			irq_dispose_mapping(irq);
 	}

I may have missed a few things, but you'll get the idea.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-30 12:17     ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-30 12:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Bharat,

On 30/08/16 11:39, Bharat Kumar Gogada wrote:
> PCIe legacy interrupts start at 1, not at 0.
> When testing with multi function device "error: hwirq 0x4 is too large for
> dummy" error comes.
> So adding one addtional interrupt when creating irq domain.
> 
> Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> ---
>  drivers/pci/host/pcie-xilinx-nwl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
> index d8d43e6..9f04411 100644
> --- a/drivers/pci/host/pcie-xilinx-nwl.c
> +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
>  	}
>  
>  	pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> -							INTX_NUM,
> +							INTX_NUM + 1,
>  							&legacy_domain_ops,
>  							pcie);

This feels like the wrong thing to do. You have INTX_NUM irqs, so the
domain allocation should reflect this. On the other hand, the way the
driver currently deals with mappings is quite broken (consistently
adding 1 to the HW interrupt).

How about something like this instead?

diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 0b597d9..72b159f 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -314,8 +314,7 @@ static void nwl_pcie_leg_handler(struct irq_desc *desc)
 	while ((status = nwl_bridge_readl(pcie, MSGF_LEG_STATUS) &
 				MSGF_LEG_SR_MASKALL) != 0) {
 		for_each_set_bit(bit, &status, INTX_NUM) {
-			virq = irq_find_mapping(pcie->legacy_irq_domain,
-						bit + 1);
+			virq = irq_find_mapping(pcie->legacy_irq_domain, bit);
 			if (virq)
 				generic_handle_irq(virq);
 		}
@@ -483,7 +482,7 @@ static void nwl_pcie_free_irq_domain(struct nwl_pcie *pcie)
 	u32 irq;
 
 	for (i = 0; i < INTX_NUM; i++) {
-		irq = irq_find_mapping(pcie->legacy_irq_domain, i + 1);
+		irq = irq_find_mapping(pcie->legacy_irq_domain, i);
 		if (irq > 0)
 			irq_dispose_mapping(irq);
 	}

I may have missed a few things, but you'll get the idea.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-30 12:17     ` Marc Zyngier
  (?)
@ 2016-08-30 14:13       ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 14:13 UTC (permalink / raw)
  To: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

> Hi Bharat,
> > @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie
> *pcie)
> >     }
> >
> >     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > -                                                   INTX_NUM,
> > +                                                   INTX_NUM + 1,
> >                                                     &legacy_domain_ops,
> >                                                     pcie);
>
> This feels like the wrong thing to do. You have INTX_NUM irqs, so the domain
> allocation should reflect this. On the other hand, the way the driver currently
> deals with mappings is quite broken (consistently adding 1 to the HW interrupt).
>
Hi Marc,

Without above change I get following crash in kernel while booting.

[    2.441684] error: hwirq 0x4 is too large for dummy

[    2.441694] ------------[ cut here ]------------

[    2.441698] WARNING: at kernel/irq/irqdomain.c:344

[    2.441702] Modules linked in:

[    2.441706]

[    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8

[    2.441718] Hardware name: xlnx,zynqmp (DT)

[    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti: ffffffc071888000

[    2.441732] PC is at irq_domain_associate+0x138/0x1c0

[    2.441738] LR is at irq_domain_associate+0x138/0x1c0

In kernel/irq/irqdomain.c function irq_domain_associate

if (WARN(hwirq >= domain->hwirq_max,
                 "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
                return -EINVAL;

Here the hwirq and hwirq_max are equal to 4 without the above condition (INTX_NUM + 1) due to which crash is coming.
This is happening as the legacy interrupts are starting from 1 (INTA).

And I'm  consistently adding 1 to the HW interrupt as in nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS
if INTA interrupt is raised but my hwirq number being mapped for INTA is 0x1 so that's I'm adding 1 to obtain correct virtual irq.
Same case in nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to obtain virtual irq and free it.

Thanks & Regards,
Bharat


This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-30 14:13       ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 14:13 UTC (permalink / raw)
  To: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-pci, Ravikiran Gummaluri, linux-kernel, linux-arm-kernel

> Hi Bharat,
> > @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie
> *pcie)
> >     }
> >
> >     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > -                                                   INTX_NUM,
> > +                                                   INTX_NUM + 1,
> >                                                     &legacy_domain_ops,
> >                                                     pcie);
>
> This feels like the wrong thing to do. You have INTX_NUM irqs, so the domain
> allocation should reflect this. On the other hand, the way the driver currently
> deals with mappings is quite broken (consistently adding 1 to the HW interrupt).
>
Hi Marc,

Without above change I get following crash in kernel while booting.

[    2.441684] error: hwirq 0x4 is too large for dummy

[    2.441694] ------------[ cut here ]------------

[    2.441698] WARNING: at kernel/irq/irqdomain.c:344

[    2.441702] Modules linked in:

[    2.441706]

[    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8

[    2.441718] Hardware name: xlnx,zynqmp (DT)

[    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti: ffffffc071888000

[    2.441732] PC is at irq_domain_associate+0x138/0x1c0

[    2.441738] LR is at irq_domain_associate+0x138/0x1c0

In kernel/irq/irqdomain.c function irq_domain_associate

if (WARN(hwirq >= domain->hwirq_max,
                 "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
                return -EINVAL;

Here the hwirq and hwirq_max are equal to 4 without the above condition (INTX_NUM + 1) due to which crash is coming.
This is happening as the legacy interrupts are starting from 1 (INTA).

And I'm  consistently adding 1 to the HW interrupt as in nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS
if INTA interrupt is raised but my hwirq number being mapped for INTA is 0x1 so that's I'm adding 1 to obtain correct virtual irq.
Same case in nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to obtain virtual irq and free it.

Thanks & Regards,
Bharat


This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-30 14:13       ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-30 14:13 UTC (permalink / raw)
  To: linux-arm-kernel

> Hi Bharat,
> > @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie
> *pcie)
> >     }
> >
> >     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > -                                                   INTX_NUM,
> > +                                                   INTX_NUM + 1,
> >                                                     &legacy_domain_ops,
> >                                                     pcie);
>
> This feels like the wrong thing to do. You have INTX_NUM irqs, so the domain
> allocation should reflect this. On the other hand, the way the driver currently
> deals with mappings is quite broken (consistently adding 1 to the HW interrupt).
>
Hi Marc,

Without above change I get following crash in kernel while booting.

[    2.441684] error: hwirq 0x4 is too large for dummy

[    2.441694] ------------[ cut here ]------------

[    2.441698] WARNING: at kernel/irq/irqdomain.c:344

[    2.441702] Modules linked in:

[    2.441706]

[    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8

[    2.441718] Hardware name: xlnx,zynqmp (DT)

[    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti: ffffffc071888000

[    2.441732] PC is at irq_domain_associate+0x138/0x1c0

[    2.441738] LR is at irq_domain_associate+0x138/0x1c0

In kernel/irq/irqdomain.c function irq_domain_associate

if (WARN(hwirq >= domain->hwirq_max,
                 "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
                return -EINVAL;

Here the hwirq and hwirq_max are equal to 4 without the above condition (INTX_NUM + 1) due to which crash is coming.
This is happening as the legacy interrupts are starting from 1 (INTA).

And I'm  consistently adding 1 to the HW interrupt as in nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS
if INTA interrupt is raised but my hwirq number being mapped for INTA is 0x1 so that's I'm adding 1 to obtain correct virtual irq.
Same case in nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to obtain virtual irq and free it.

Thanks & Regards,
Bharat


This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-30 14:13       ` Bharat Kumar Gogada
  (?)
@ 2016-08-30 15:02         ` Marc Zyngier
  -1 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-30 15:02 UTC (permalink / raw)
  To: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

On 30/08/16 15:13, Bharat Kumar Gogada wrote:
>> Hi Bharat,
>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie
>> *pcie)
>>>     }
>>>
>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>> -                                                   INTX_NUM,
>>> +                                                   INTX_NUM + 1,
>>>                                                     &legacy_domain_ops,
>>>                                                     pcie);
>>
>> This feels like the wrong thing to do. You have INTX_NUM irqs, so the domain
>> allocation should reflect this. On the other hand, the way the driver currently
>> deals with mappings is quite broken (consistently adding 1 to the HW interrupt).
>>
> Hi Marc,
> 
> Without above change I get following crash in kernel while booting.
> 
> [    2.441684] error: hwirq 0x4 is too large for dummy
> 
> [    2.441694] ------------[ cut here ]------------
> 
> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> 
> [    2.441702] Modules linked in:
> 
> [    2.441706]
> 
> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> 
> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> 
> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti: ffffffc071888000
> 
> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> 
> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> 
> In kernel/irq/irqdomain.c function irq_domain_associate
> 
> if (WARN(hwirq >= domain->hwirq_max,
>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
>                 return -EINVAL;
> 
> Here the hwirq and hwirq_max are equal to 4 without the above condition (INTX_NUM + 1) due to which crash is coming.
> This is happening as the legacy interrupts are starting from 1 (INTA).

I understood that. I'm still persisting in saying that you have the
wrong fix.

Your domain should always allocate many interrupts as you have interrupt
sources. These interrupts (hwirq) should be numbered from 0 to (n-1).

> And I'm  consistently adding 1 to the HW interrupt as in
> nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
> interrupt is raised but my hwirq number being mapped for INTA is 0x1
> so that's I'm adding 1 to obtain correct virtual irq. Same case in
> nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
> obtain virtual irq and free it.

I can see that. Nonetheless, this is wrong. Can you please test the
patch I provided in my reply and report what happens?

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-30 15:02         ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-30 15:02 UTC (permalink / raw)
  To: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-pci, Ravikiran Gummaluri, linux-kernel, linux-arm-kernel

On 30/08/16 15:13, Bharat Kumar Gogada wrote:
>> Hi Bharat,
>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie
>> *pcie)
>>>     }
>>>
>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>> -                                                   INTX_NUM,
>>> +                                                   INTX_NUM + 1,
>>>                                                     &legacy_domain_ops,
>>>                                                     pcie);
>>
>> This feels like the wrong thing to do. You have INTX_NUM irqs, so the domain
>> allocation should reflect this. On the other hand, the way the driver currently
>> deals with mappings is quite broken (consistently adding 1 to the HW interrupt).
>>
> Hi Marc,
> 
> Without above change I get following crash in kernel while booting.
> 
> [    2.441684] error: hwirq 0x4 is too large for dummy
> 
> [    2.441694] ------------[ cut here ]------------
> 
> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> 
> [    2.441702] Modules linked in:
> 
> [    2.441706]
> 
> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> 
> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> 
> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti: ffffffc071888000
> 
> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> 
> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> 
> In kernel/irq/irqdomain.c function irq_domain_associate
> 
> if (WARN(hwirq >= domain->hwirq_max,
>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
>                 return -EINVAL;
> 
> Here the hwirq and hwirq_max are equal to 4 without the above condition (INTX_NUM + 1) due to which crash is coming.
> This is happening as the legacy interrupts are starting from 1 (INTA).

I understood that. I'm still persisting in saying that you have the
wrong fix.

Your domain should always allocate many interrupts as you have interrupt
sources. These interrupts (hwirq) should be numbered from 0 to (n-1).

> And I'm  consistently adding 1 to the HW interrupt as in
> nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
> interrupt is raised but my hwirq number being mapped for INTA is 0x1
> so that's I'm adding 1 to obtain correct virtual irq. Same case in
> nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
> obtain virtual irq and free it.

I can see that. Nonetheless, this is wrong. Can you please test the
patch I provided in my reply and report what happens?

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-30 15:02         ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-30 15:02 UTC (permalink / raw)
  To: linux-arm-kernel

On 30/08/16 15:13, Bharat Kumar Gogada wrote:
>> Hi Bharat,
>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie
>> *pcie)
>>>     }
>>>
>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>> -                                                   INTX_NUM,
>>> +                                                   INTX_NUM + 1,
>>>                                                     &legacy_domain_ops,
>>>                                                     pcie);
>>
>> This feels like the wrong thing to do. You have INTX_NUM irqs, so the domain
>> allocation should reflect this. On the other hand, the way the driver currently
>> deals with mappings is quite broken (consistently adding 1 to the HW interrupt).
>>
> Hi Marc,
> 
> Without above change I get following crash in kernel while booting.
> 
> [    2.441684] error: hwirq 0x4 is too large for dummy
> 
> [    2.441694] ------------[ cut here ]------------
> 
> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> 
> [    2.441702] Modules linked in:
> 
> [    2.441706]
> 
> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> 
> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> 
> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti: ffffffc071888000
> 
> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> 
> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> 
> In kernel/irq/irqdomain.c function irq_domain_associate
> 
> if (WARN(hwirq >= domain->hwirq_max,
>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
>                 return -EINVAL;
> 
> Here the hwirq and hwirq_max are equal to 4 without the above condition (INTX_NUM + 1) due to which crash is coming.
> This is happening as the legacy interrupts are starting from 1 (INTA).

I understood that. I'm still persisting in saying that you have the
wrong fix.

Your domain should always allocate many interrupts as you have interrupt
sources. These interrupts (hwirq) should be numbered from 0 to (n-1).

> And I'm  consistently adding 1 to the HW interrupt as in
> nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
> interrupt is raised but my hwirq number being mapped for INTA is 0x1
> so that's I'm adding 1 to obtain correct virtual irq. Same case in
> nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
> obtain virtual irq and free it.

I can see that. Nonetheless, this is wrong. Can you please test the
patch I provided in my reply and report what happens?

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-30 15:02         ` Marc Zyngier
  (?)
@ 2016-08-31  9:56           ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-31  9:56 UTC (permalink / raw)
  To: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

 > On 30/08/16 15:13, Bharat Kumar Gogada wrote:
> >> Hi Bharat,
> >>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>> nwl_pcie
> >> *pcie)
> >>>     }
> >>>
> >>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>> -                                                   INTX_NUM,
> >>> +                                                   INTX_NUM + 1,
> >>>                                                     &legacy_domain_ops,
> >>>                                                     pcie);
> >>
> >> This feels like the wrong thing to do. You have INTX_NUM irqs, so the
> >> domain allocation should reflect this. On the other hand, the way the
> >> driver currently deals with mappings is quite broken (consistently adding 1 to
> the HW interrupt).
> >>
> > Hi Marc,
> >
> > Without above change I get following crash in kernel while booting.
> >
> > [    2.441684] error: hwirq 0x4 is too large for dummy
> >
> > [    2.441694] ------------[ cut here ]------------
> >
> > [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >
> > [    2.441702] Modules linked in:
> >
> > [    2.441706]
> >
> > [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >
> > [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >
> > [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> ffffffc071888000
> >
> > [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >
> > [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >
> > In kernel/irq/irqdomain.c function irq_domain_associate
> >
> > if (WARN(hwirq >= domain->hwirq_max,
> >                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
> >                 return -EINVAL;
> >
> > Here the hwirq and hwirq_max are equal to 4 without the above condition
> (INTX_NUM + 1) due to which crash is coming.
> > This is happening as the legacy interrupts are starting from 1 (INTA).
> 
> I understood that. I'm still persisting in saying that you have the wrong fix.
> 
> Your domain should always allocate many interrupts as you have interrupt
> sources. These interrupts (hwirq) should be numbered from 0 to (n-1).

Agreed, but here comes the problem the hwirq for legacy interrupts will start at 0x1 to 0x4 (INTA to INTD) and 
these values are as per PCIe specification for legacy interrupts.
So these cannot be numbered from 0. So when 0x4 (INTD) for a multi-function device comes the crash occurs.
	
> 
> > And I'm  consistently adding 1 to the HW interrupt as in
> > nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
> > interrupt is raised but my hwirq number being mapped for INTA is 0x1
> > so that's I'm adding 1 to obtain correct virtual irq. Same case in
> > nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
> > obtain virtual irq and free it.
> 
> I can see that. Nonetheless, this is wrong. Can you please test the patch I
> provided in my reply and report what happens?

Can you be more specific on what is the wrong, I'm adding one since the hwirq starts from 0x1 as mentioned 
above.
I did try your suggestion with Ethernet card, but kernel hangs (it does not show any crash also, just hangs) when I do 
interface up (without bit + 1, using only bit position in handler). This is not working because in the legacy domain virq mapping
starts with hwirq 0x1, there is no mapping for 0x0 in the domain, so EP interrupt is not serviced since virq being returned is zero.

Thanks & Regards,
Bharat



 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-31  9:56           ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-31  9:56 UTC (permalink / raw)
  To: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

 > On 30/08/16 15:13, Bharat Kumar Gogada wrote:
> >> Hi Bharat,
> >>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>> nwl_pcie
> >> *pcie)
> >>>     }
> >>>
> >>>     pcie->legacy_irq_domain =3D irq_domain_add_linear(legacy_intc_nod=
e,
> >>> -                                                   INTX_NUM,
> >>> +                                                   INTX_NUM + 1,
> >>>                                                     &legacy_domain_op=
s,
> >>>                                                     pcie);
> >>
> >> This feels like the wrong thing to do. You have INTX_NUM irqs, so the
> >> domain allocation should reflect this. On the other hand, the way the
> >> driver currently deals with mappings is quite broken (consistently add=
ing 1 to
> the HW interrupt).
> >>
> > Hi Marc,
> >
> > Without above change I get following crash in kernel while booting.
> >
> > [    2.441684] error: hwirq 0x4 is too large for dummy
> >
> > [    2.441694] ------------[ cut here ]------------
> >
> > [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >
> > [    2.441702] Modules linked in:
> >
> > [    2.441706]
> >
> > [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >
> > [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >
> > [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> ffffffc071888000
> >
> > [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >
> > [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >
> > In kernel/irq/irqdomain.c function irq_domain_associate
> >
> > if (WARN(hwirq >=3D domain->hwirq_max,
> >                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq,=
 domain->name))
> >                 return -EINVAL;
> >
> > Here the hwirq and hwirq_max are equal to 4 without the above condition
> (INTX_NUM + 1) due to which crash is coming.
> > This is happening as the legacy interrupts are starting from 1 (INTA).
>=20
> I understood that. I'm still persisting in saying that you have the wrong=
 fix.
>=20
> Your domain should always allocate many interrupts as you have interrupt
> sources. These interrupts (hwirq) should be numbered from 0 to (n-1).

Agreed, but here comes the problem the hwirq for legacy interrupts will sta=
rt at 0x1 to 0x4 (INTA to INTD) and=20
these values are as per PCIe specification for legacy interrupts.
So these cannot be numbered from 0. So when 0x4 (INTD) for a multi-function=
 device comes the crash occurs.
=09
>=20
> > And I'm  consistently adding 1 to the HW interrupt as in
> > nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
> > interrupt is raised but my hwirq number being mapped for INTA is 0x1
> > so that's I'm adding 1 to obtain correct virtual irq. Same case in
> > nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
> > obtain virtual irq and free it.
>=20
> I can see that. Nonetheless, this is wrong. Can you please test the patch=
 I
> provided in my reply and report what happens?

Can you be more specific on what is the wrong, I'm adding one since the hwi=
rq starts from 0x1 as mentioned=20
above.
I did try your suggestion with Ethernet card, but kernel hangs (it does not=
 show any crash also, just hangs) when I do=20
interface up (without bit + 1, using only bit position in handler). This is=
 not working because in the legacy domain virq mapping
starts with hwirq 0x1, there is no mapping for 0x0 in the domain, so EP int=
errupt is not serviced since virq being returned is zero.

Thanks & Regards,
Bharat



=20

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-31  9:56           ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-08-31  9:56 UTC (permalink / raw)
  To: linux-arm-kernel

 > On 30/08/16 15:13, Bharat Kumar Gogada wrote:
> >> Hi Bharat,
> >>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>> nwl_pcie
> >> *pcie)
> >>>     }
> >>>
> >>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>> -                                                   INTX_NUM,
> >>> +                                                   INTX_NUM + 1,
> >>>                                                     &legacy_domain_ops,
> >>>                                                     pcie);
> >>
> >> This feels like the wrong thing to do. You have INTX_NUM irqs, so the
> >> domain allocation should reflect this. On the other hand, the way the
> >> driver currently deals with mappings is quite broken (consistently adding 1 to
> the HW interrupt).
> >>
> > Hi Marc,
> >
> > Without above change I get following crash in kernel while booting.
> >
> > [    2.441684] error: hwirq 0x4 is too large for dummy
> >
> > [    2.441694] ------------[ cut here ]------------
> >
> > [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >
> > [    2.441702] Modules linked in:
> >
> > [    2.441706]
> >
> > [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >
> > [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >
> > [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> ffffffc071888000
> >
> > [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >
> > [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >
> > In kernel/irq/irqdomain.c function irq_domain_associate
> >
> > if (WARN(hwirq >= domain->hwirq_max,
> >                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
> >                 return -EINVAL;
> >
> > Here the hwirq and hwirq_max are equal to 4 without the above condition
> (INTX_NUM + 1) due to which crash is coming.
> > This is happening as the legacy interrupts are starting from 1 (INTA).
> 
> I understood that. I'm still persisting in saying that you have the wrong fix.
> 
> Your domain should always allocate many interrupts as you have interrupt
> sources. These interrupts (hwirq) should be numbered from 0 to (n-1).

Agreed, but here comes the problem the hwirq for legacy interrupts will start at 0x1 to 0x4 (INTA to INTD) and 
these values are as per PCIe specification for legacy interrupts.
So these cannot be numbered from 0. So when 0x4 (INTD) for a multi-function device comes the crash occurs.
	
> 
> > And I'm  consistently adding 1 to the HW interrupt as in
> > nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
> > interrupt is raised but my hwirq number being mapped for INTA is 0x1
> > so that's I'm adding 1 to obtain correct virtual irq. Same case in
> > nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
> > obtain virtual irq and free it.
> 
> I can see that. Nonetheless, this is wrong. Can you please test the patch I
> provided in my reply and report what happens?

Can you be more specific on what is the wrong, I'm adding one since the hwirq starts from 0x1 as mentioned 
above.
I did try your suggestion with Ethernet card, but kernel hangs (it does not show any crash also, just hangs) when I do 
interface up (without bit + 1, using only bit position in handler). This is not working because in the legacy domain virq mapping
starts with hwirq 0x1, there is no mapping for 0x0 in the domain, so EP interrupt is not serviced since virq being returned is zero.

Thanks & Regards,
Bharat



 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-31  9:56           ` Bharat Kumar Gogada
  (?)
@ 2016-08-31 10:56             ` Marc Zyngier
  -1 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-31 10:56 UTC (permalink / raw)
  To: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

On 31/08/16 10:56, Bharat Kumar Gogada wrote:
>  > On 30/08/16 15:13, Bharat Kumar Gogada wrote:
>>>> Hi Bharat,
>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>> nwl_pcie
>>>> *pcie)
>>>>>     }
>>>>>
>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>> -                                                   INTX_NUM,
>>>>> +                                                   INTX_NUM + 1,
>>>>>                                                     &legacy_domain_ops,
>>>>>                                                     pcie);
>>>>
>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so the
>>>> domain allocation should reflect this. On the other hand, the way the
>>>> driver currently deals with mappings is quite broken (consistently adding 1 to
>> the HW interrupt).
>>>>
>>> Hi Marc,
>>>
>>> Without above change I get following crash in kernel while booting.
>>>
>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>
>>> [    2.441694] ------------[ cut here ]------------
>>>
>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>
>>> [    2.441702] Modules linked in:
>>>
>>> [    2.441706]
>>>
>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>
>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>
>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>> ffffffc071888000
>>>
>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>
>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>
>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>
>>> if (WARN(hwirq >= domain->hwirq_max,
>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
>>>                 return -EINVAL;
>>>
>>> Here the hwirq and hwirq_max are equal to 4 without the above condition
>> (INTX_NUM + 1) due to which crash is coming.
>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>
>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>
>> Your domain should always allocate many interrupts as you have interrupt
>> sources. These interrupts (hwirq) should be numbered from 0 to (n-1).
> 
> Agreed, but here comes the problem the hwirq for legacy interrupts
> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> PCIe specification for legacy interrupts. So these cannot be numbered
> from 0. So when 0x4 (INTD) for a multi-function device comes the
> crash occurs. 

So who provides this hwirq? Who calls irq_domain_associate() with hwirq
set to 4?

>>
>>> And I'm  consistently adding 1 to the HW interrupt as in
>>> nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
>>> interrupt is raised but my hwirq number being mapped for INTA is 0x1
>>> so that's I'm adding 1 to obtain correct virtual irq. Same case in
>>> nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
>>> obtain virtual irq and free it.
>>
>> I can see that. Nonetheless, this is wrong. Can you please test the patch I
>> provided in my reply and report what happens?
> 
> Can you be more specific on what is the wrong, I'm adding one since
> the hwirq starts from 0x1 as mentioned above.

hwirq should always be the value that is reported by the HW. In your
case, this ranges from 0 to 3, never 4. So if we can understand why you
get called with 4 as a hwirq, we can fix this properly, for everyone. It
is also worth noting that other drivers do not have to do this +1 dance.

> I did try your suggestion with Ethernet card, but kernel hangs (it
> does not show any crash also, just hangs) when I do interface up
> (without bit + 1, using only bit position in handler). This is not
> working because in the legacy domain virq mapping starts with hwirq
> 0x1, there is no mapping for 0x0 in the domain, so EP interrupt is
> not serviced since virq being returned is zero.

Right. So let's go back to first principles and find out *who* decides
about the hwirq starting at 1 instead of zero.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-31 10:56             ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-31 10:56 UTC (permalink / raw)
  To: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-pci, Ravikiran Gummaluri, linux-kernel, linux-arm-kernel

On 31/08/16 10:56, Bharat Kumar Gogada wrote:
>  > On 30/08/16 15:13, Bharat Kumar Gogada wrote:
>>>> Hi Bharat,
>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>> nwl_pcie
>>>> *pcie)
>>>>>     }
>>>>>
>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>> -                                                   INTX_NUM,
>>>>> +                                                   INTX_NUM + 1,
>>>>>                                                     &legacy_domain_ops,
>>>>>                                                     pcie);
>>>>
>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so the
>>>> domain allocation should reflect this. On the other hand, the way the
>>>> driver currently deals with mappings is quite broken (consistently adding 1 to
>> the HW interrupt).
>>>>
>>> Hi Marc,
>>>
>>> Without above change I get following crash in kernel while booting.
>>>
>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>
>>> [    2.441694] ------------[ cut here ]------------
>>>
>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>
>>> [    2.441702] Modules linked in:
>>>
>>> [    2.441706]
>>>
>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>
>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>
>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>> ffffffc071888000
>>>
>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>
>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>
>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>
>>> if (WARN(hwirq >= domain->hwirq_max,
>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
>>>                 return -EINVAL;
>>>
>>> Here the hwirq and hwirq_max are equal to 4 without the above condition
>> (INTX_NUM + 1) due to which crash is coming.
>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>
>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>
>> Your domain should always allocate many interrupts as you have interrupt
>> sources. These interrupts (hwirq) should be numbered from 0 to (n-1).
> 
> Agreed, but here comes the problem the hwirq for legacy interrupts
> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> PCIe specification for legacy interrupts. So these cannot be numbered
> from 0. So when 0x4 (INTD) for a multi-function device comes the
> crash occurs. 

So who provides this hwirq? Who calls irq_domain_associate() with hwirq
set to 4?

>>
>>> And I'm  consistently adding 1 to the HW interrupt as in
>>> nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
>>> interrupt is raised but my hwirq number being mapped for INTA is 0x1
>>> so that's I'm adding 1 to obtain correct virtual irq. Same case in
>>> nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
>>> obtain virtual irq and free it.
>>
>> I can see that. Nonetheless, this is wrong. Can you please test the patch I
>> provided in my reply and report what happens?
> 
> Can you be more specific on what is the wrong, I'm adding one since
> the hwirq starts from 0x1 as mentioned above.

hwirq should always be the value that is reported by the HW. In your
case, this ranges from 0 to 3, never 4. So if we can understand why you
get called with 4 as a hwirq, we can fix this properly, for everyone. It
is also worth noting that other drivers do not have to do this +1 dance.

> I did try your suggestion with Ethernet card, but kernel hangs (it
> does not show any crash also, just hangs) when I do interface up
> (without bit + 1, using only bit position in handler). This is not
> working because in the legacy domain virq mapping starts with hwirq
> 0x1, there is no mapping for 0x0 in the domain, so EP interrupt is
> not serviced since virq being returned is zero.

Right. So let's go back to first principles and find out *who* decides
about the hwirq starting at 1 instead of zero.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-08-31 10:56             ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-08-31 10:56 UTC (permalink / raw)
  To: linux-arm-kernel

On 31/08/16 10:56, Bharat Kumar Gogada wrote:
>  > On 30/08/16 15:13, Bharat Kumar Gogada wrote:
>>>> Hi Bharat,
>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>> nwl_pcie
>>>> *pcie)
>>>>>     }
>>>>>
>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>> -                                                   INTX_NUM,
>>>>> +                                                   INTX_NUM + 1,
>>>>>                                                     &legacy_domain_ops,
>>>>>                                                     pcie);
>>>>
>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so the
>>>> domain allocation should reflect this. On the other hand, the way the
>>>> driver currently deals with mappings is quite broken (consistently adding 1 to
>> the HW interrupt).
>>>>
>>> Hi Marc,
>>>
>>> Without above change I get following crash in kernel while booting.
>>>
>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>
>>> [    2.441694] ------------[ cut here ]------------
>>>
>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>
>>> [    2.441702] Modules linked in:
>>>
>>> [    2.441706]
>>>
>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>
>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>
>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>> ffffffc071888000
>>>
>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>
>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>
>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>
>>> if (WARN(hwirq >= domain->hwirq_max,
>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
>>>                 return -EINVAL;
>>>
>>> Here the hwirq and hwirq_max are equal to 4 without the above condition
>> (INTX_NUM + 1) due to which crash is coming.
>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>
>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>
>> Your domain should always allocate many interrupts as you have interrupt
>> sources. These interrupts (hwirq) should be numbered from 0 to (n-1).
> 
> Agreed, but here comes the problem the hwirq for legacy interrupts
> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> PCIe specification for legacy interrupts. So these cannot be numbered
> from 0. So when 0x4 (INTD) for a multi-function device comes the
> crash occurs. 

So who provides this hwirq? Who calls irq_domain_associate() with hwirq
set to 4?

>>
>>> And I'm  consistently adding 1 to the HW interrupt as in
>>> nwl_pcie_leg_handler I get 0th bit set from MSGF_LEG_STATUS if INTA
>>> interrupt is raised but my hwirq number being mapped for INTA is 0x1
>>> so that's I'm adding 1 to obtain correct virtual irq. Same case in
>>> nwl_pcie_free_irq_domain since hwirq starts from one I'm adding 1 to
>>> obtain virtual irq and free it.
>>
>> I can see that. Nonetheless, this is wrong. Can you please test the patch I
>> provided in my reply and report what happens?
> 
> Can you be more specific on what is the wrong, I'm adding one since
> the hwirq starts from 0x1 as mentioned above.

hwirq should always be the value that is reported by the HW. In your
case, this ranges from 0 to 3, never 4. So if we can understand why you
get called with 4 as a hwirq, we can fix this properly, for everyone. It
is also worth noting that other drivers do not have to do this +1 dance.

> I did try your suggestion with Ethernet card, but kernel hangs (it
> does not show any crash also, just hangs) when I do interface up
> (without bit + 1, using only bit position in handler). This is not
> working because in the legacy domain virq mapping starts with hwirq
> 0x1, there is no mapping for 0x0 in the domain, so EP interrupt is
> not serviced since virq being returned is zero.

Right. So let's go back to first principles and find out *who* decides
about the hwirq starting at 1 instead of zero.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-08-31 10:56             ` Marc Zyngier
  (?)
@ 2016-09-01  5:19               ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-01  5:19 UTC (permalink / raw)
  To: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

> >>>> Hi Bharat,
> >>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>>>> nwl_pcie
> >>>> *pcie)
> >>>>>     }
> >>>>>
> >>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>>>> -                                                   INTX_NUM,
> >>>>> +                                                   INTX_NUM + 1,
> >>>>>                                                     &legacy_domain_ops,
> >>>>>                                                     pcie);
> >>>>
> >>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> >>>> the domain allocation should reflect this. On the other hand, the
> >>>> way the driver currently deals with mappings is quite broken
> >>>> (consistently adding 1 to
> >> the HW interrupt).
> >>>>
> >>> Hi Marc,
> >>>
> >>> Without above change I get following crash in kernel while booting.
> >>>
> >>> [    2.441684] error: hwirq 0x4 is too large for dummy
> >>>
> >>> [    2.441694] ------------[ cut here ]------------
> >>>
> >>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >>>
> >>> [    2.441702] Modules linked in:
> >>>
> >>> [    2.441706]
> >>>
> >>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >>>
> >>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >>>
> >>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> >> ffffffc071888000
> >>>
> >>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >>>
> >>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >>>
> >>> In kernel/irq/irqdomain.c function irq_domain_associate
> >>>
> >>> if (WARN(hwirq >= domain->hwirq_max,
> >>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> >name))
> >>>                 return -EINVAL;
> >>>
> >>> Here the hwirq and hwirq_max are equal to 4 without the above
> >>> condition
> >> (INTX_NUM + 1) due to which crash is coming.
> >>> This is happening as the legacy interrupts are starting from 1 (INTA).
> >>
> >> I understood that. I'm still persisting in saying that you have the wrong fix.
> >>
> >> Your domain should always allocate many interrupts as you have
> >> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> 1).
> >
> > Agreed, but here comes the problem the hwirq for legacy interrupts
> > will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > PCIe specification for legacy interrupts. So these cannot be numbered
> > from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > occurs.
> 
> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> 4?
> 
PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
in parameter of struct of_phandle_args.
This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
to hwirq (*hwirq = fwspec->param[0]).
And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.

So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
 
Thanks & Regards,
Bharat
 

 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-01  5:19               ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-01  5:19 UTC (permalink / raw)
  To: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd
  Cc: linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

> >>>> Hi Bharat,
> >>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>>>> nwl_pcie
> >>>> *pcie)
> >>>>>     }
> >>>>>
> >>>>>     pcie->legacy_irq_domain =3D irq_domain_add_linear(legacy_intc_n=
ode,
> >>>>> -                                                   INTX_NUM,
> >>>>> +                                                   INTX_NUM + 1,
> >>>>>                                                     &legacy_domain_=
ops,
> >>>>>                                                     pcie);
> >>>>
> >>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> >>>> the domain allocation should reflect this. On the other hand, the
> >>>> way the driver currently deals with mappings is quite broken
> >>>> (consistently adding 1 to
> >> the HW interrupt).
> >>>>
> >>> Hi Marc,
> >>>
> >>> Without above change I get following crash in kernel while booting.
> >>>
> >>> [    2.441684] error: hwirq 0x4 is too large for dummy
> >>>
> >>> [    2.441694] ------------[ cut here ]------------
> >>>
> >>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >>>
> >>> [    2.441702] Modules linked in:
> >>>
> >>> [    2.441706]
> >>>
> >>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >>>
> >>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >>>
> >>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> >> ffffffc071888000
> >>>
> >>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >>>
> >>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >>>
> >>> In kernel/irq/irqdomain.c function irq_domain_associate
> >>>
> >>> if (WARN(hwirq >=3D domain->hwirq_max,
> >>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwir=
q, domain-
> >name))
> >>>                 return -EINVAL;
> >>>
> >>> Here the hwirq and hwirq_max are equal to 4 without the above
> >>> condition
> >> (INTX_NUM + 1) due to which crash is coming.
> >>> This is happening as the legacy interrupts are starting from 1 (INTA)=
.
> >>
> >> I understood that. I'm still persisting in saying that you have the wr=
ong fix.
> >>
> >> Your domain should always allocate many interrupts as you have
> >> interrupt sources. These interrupts (hwirq) should be numbered from 0 =
to (n-
> 1).
> >
> > Agreed, but here comes the problem the hwirq for legacy interrupts
> > will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > PCIe specification for legacy interrupts. So these cannot be numbered
> > from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > occurs.
>=20
> So who provides this hwirq? Who calls irq_domain_associate() with hwirq s=
et to
> 4?
>=20
PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci=
.c for every pci device.
The purpose of this function is to assign dev->irq using of_irq_parse_and_m=
ap_pci.
of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERR=
UPT_PIN from configuration space and saves it
in parameter of struct of_phandle_args.
This structure is passed to irq_create_of_mapping where it invokes irq_crea=
te_fwspec_mapping.
irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here=
 the above saved PCI_INTERRUPT_PIN value is assigned=20
to hwirq (*hwirq =3D fwspec->param[0]).
And then using this hwirq irq_create_mapping -> irq_domain_associate were i=
nvoked and mapping is created for virtual irq with this hwirq.
So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so =
hwirq starts from 0x1 to 0x4.

So the values are more generic w.r.t to protocol, that's why hwirq will ran=
ge from 0x1 to 0x4.=20
And then if you check pcie-altera.c they are doing this adding one in their=
 handler and while creating legacy domain.
=20
Thanks & Regards,
Bharat
=20

=20

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-01  5:19               ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-01  5:19 UTC (permalink / raw)
  To: linux-arm-kernel

> >>>> Hi Bharat,
> >>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>>>> nwl_pcie
> >>>> *pcie)
> >>>>>     }
> >>>>>
> >>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>>>> -                                                   INTX_NUM,
> >>>>> +                                                   INTX_NUM + 1,
> >>>>>                                                     &legacy_domain_ops,
> >>>>>                                                     pcie);
> >>>>
> >>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> >>>> the domain allocation should reflect this. On the other hand, the
> >>>> way the driver currently deals with mappings is quite broken
> >>>> (consistently adding 1 to
> >> the HW interrupt).
> >>>>
> >>> Hi Marc,
> >>>
> >>> Without above change I get following crash in kernel while booting.
> >>>
> >>> [    2.441684] error: hwirq 0x4 is too large for dummy
> >>>
> >>> [    2.441694] ------------[ cut here ]------------
> >>>
> >>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >>>
> >>> [    2.441702] Modules linked in:
> >>>
> >>> [    2.441706]
> >>>
> >>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >>>
> >>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >>>
> >>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> >> ffffffc071888000
> >>>
> >>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >>>
> >>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >>>
> >>> In kernel/irq/irqdomain.c function irq_domain_associate
> >>>
> >>> if (WARN(hwirq >= domain->hwirq_max,
> >>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> >name))
> >>>                 return -EINVAL;
> >>>
> >>> Here the hwirq and hwirq_max are equal to 4 without the above
> >>> condition
> >> (INTX_NUM + 1) due to which crash is coming.
> >>> This is happening as the legacy interrupts are starting from 1 (INTA).
> >>
> >> I understood that. I'm still persisting in saying that you have the wrong fix.
> >>
> >> Your domain should always allocate many interrupts as you have
> >> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> 1).
> >
> > Agreed, but here comes the problem the hwirq for legacy interrupts
> > will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > PCIe specification for legacy interrupts. So these cannot be numbered
> > from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > occurs.
> 
> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> 4?
> 
PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
in parameter of struct of_phandle_args.
This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
to hwirq (*hwirq = fwspec->param[0]).
And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.

So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
 
Thanks & Regards,
Bharat
 

 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-01  5:19               ` Bharat Kumar Gogada
  (?)
@ 2016-09-12 22:02                 ` Bjorn Helgaas
  -1 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-12 22:02 UTC (permalink / raw)
  To: Bharat Kumar Gogada
  Cc: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>> Hi Bharat,
> > >>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> > >>>>> nwl_pcie
> > >>>> *pcie)
> > >>>>>     }
> > >>>>>
> > >>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > >>>>> -                                                   INTX_NUM,
> > >>>>> +                                                   INTX_NUM + 1,
> > >>>>>                                                     &legacy_domain_ops,
> > >>>>>                                                     pcie);
> > >>>>
> > >>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> > >>>> the domain allocation should reflect this. On the other hand, the
> > >>>> way the driver currently deals with mappings is quite broken
> > >>>> (consistently adding 1 to
> > >> the HW interrupt).
> > >>>>
> > >>> Hi Marc,
> > >>>
> > >>> Without above change I get following crash in kernel while booting.
> > >>>
> > >>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>
> > >>> [    2.441694] ------------[ cut here ]------------
> > >>>
> > >>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>
> > >>> [    2.441702] Modules linked in:
> > >>>
> > >>> [    2.441706]
> > >>>
> > >>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>
> > >>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>
> > >>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >> ffffffc071888000
> > >>>
> > >>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>
> > >>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>
> > >>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>
> > >>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> > >name))
> > >>>                 return -EINVAL;
> > >>>
> > >>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>> condition
> > >> (INTX_NUM + 1) due to which crash is coming.
> > >>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>
> > >> I understood that. I'm still persisting in saying that you have the wrong fix.
> > >>
> > >> Your domain should always allocate many interrupts as you have
> > >> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> > 1).
> > >
> > > Agreed, but here comes the problem the hwirq for legacy interrupts
> > > will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > > PCIe specification for legacy interrupts. So these cannot be numbered
> > > from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > > occurs.
> > 
> > So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> > 4?
> > 
> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> in parameter of struct of_phandle_args.
> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> to hwirq (*hwirq = fwspec->param[0]).
> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> 
> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.

Is this resolved yet?  Marc, are you happy, or should we iterate on this
again?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-12 22:02                 ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-12 22:02 UTC (permalink / raw)
  To: Bharat Kumar Gogada
  Cc: Marc Zyngier, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>> Hi Bharat,
> > >>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> > >>>>> nwl_pcie
> > >>>> *pcie)
> > >>>>>     }
> > >>>>>
> > >>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > >>>>> -                                                   INTX_NUM,
> > >>>>> +                                                   INTX_NUM + 1,
> > >>>>>                                                     &legacy_domain_ops,
> > >>>>>                                                     pcie);
> > >>>>
> > >>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> > >>>> the domain allocation should reflect this. On the other hand, the
> > >>>> way the driver currently deals with mappings is quite broken
> > >>>> (consistently adding 1 to
> > >> the HW interrupt).
> > >>>>
> > >>> Hi Marc,
> > >>>
> > >>> Without above change I get following crash in kernel while booting.
> > >>>
> > >>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>
> > >>> [    2.441694] ------------[ cut here ]------------
> > >>>
> > >>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>
> > >>> [    2.441702] Modules linked in:
> > >>>
> > >>> [    2.441706]
> > >>>
> > >>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>
> > >>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>
> > >>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >> ffffffc071888000
> > >>>
> > >>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>
> > >>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>
> > >>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>
> > >>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> > >name))
> > >>>                 return -EINVAL;
> > >>>
> > >>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>> condition
> > >> (INTX_NUM + 1) due to which crash is coming.
> > >>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>
> > >> I understood that. I'm still persisting in saying that you have the wrong fix.
> > >>
> > >> Your domain should always allocate many interrupts as you have
> > >> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> > 1).
> > >
> > > Agreed, but here comes the problem the hwirq for legacy interrupts
> > > will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > > PCIe specification for legacy interrupts. So these cannot be numbered
> > > from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > > occurs.
> > 
> > So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> > 4?
> > 
> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> in parameter of struct of_phandle_args.
> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> to hwirq (*hwirq = fwspec->param[0]).
> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> 
> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.

Is this resolved yet?  Marc, are you happy, or should we iterate on this
again?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-12 22:02                 ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-12 22:02 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>> Hi Bharat,
> > >>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> > >>>>> nwl_pcie
> > >>>> *pcie)
> > >>>>>     }
> > >>>>>
> > >>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > >>>>> -                                                   INTX_NUM,
> > >>>>> +                                                   INTX_NUM + 1,
> > >>>>>                                                     &legacy_domain_ops,
> > >>>>>                                                     pcie);
> > >>>>
> > >>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> > >>>> the domain allocation should reflect this. On the other hand, the
> > >>>> way the driver currently deals with mappings is quite broken
> > >>>> (consistently adding 1 to
> > >> the HW interrupt).
> > >>>>
> > >>> Hi Marc,
> > >>>
> > >>> Without above change I get following crash in kernel while booting.
> > >>>
> > >>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>
> > >>> [    2.441694] ------------[ cut here ]------------
> > >>>
> > >>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>
> > >>> [    2.441702] Modules linked in:
> > >>>
> > >>> [    2.441706]
> > >>>
> > >>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>
> > >>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>
> > >>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >> ffffffc071888000
> > >>>
> > >>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>
> > >>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>
> > >>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>
> > >>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> > >name))
> > >>>                 return -EINVAL;
> > >>>
> > >>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>> condition
> > >> (INTX_NUM + 1) due to which crash is coming.
> > >>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>
> > >> I understood that. I'm still persisting in saying that you have the wrong fix.
> > >>
> > >> Your domain should always allocate many interrupts as you have
> > >> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> > 1).
> > >
> > > Agreed, but here comes the problem the hwirq for legacy interrupts
> > > will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > > PCIe specification for legacy interrupts. So these cannot be numbered
> > > from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > > occurs.
> > 
> > So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> > 4?
> > 
> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> in parameter of struct of_phandle_args.
> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> to hwirq (*hwirq = fwspec->param[0]).
> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> 
> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.

Is this resolved yet?  Marc, are you happy, or should we iterate on this
again?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-12 22:02                 ` Bjorn Helgaas
  (?)
@ 2016-09-13  7:41                   ` Marc Zyngier
  -1 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-09-13  7:41 UTC (permalink / raw)
  To: Bjorn Helgaas, Bharat Kumar Gogada
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, Michal Simek, arnd,
	linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

On 12/09/16 23:02, Bjorn Helgaas wrote:
> On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
>>>>>>> Hi Bharat,
>>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>>>>> nwl_pcie
>>>>>>> *pcie)
>>>>>>>>     }
>>>>>>>>
>>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>>>>> -                                                   INTX_NUM,
>>>>>>>> +                                                   INTX_NUM + 1,
>>>>>>>>                                                     &legacy_domain_ops,
>>>>>>>>                                                     pcie);
>>>>>>>
>>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
>>>>>>> the domain allocation should reflect this. On the other hand, the
>>>>>>> way the driver currently deals with mappings is quite broken
>>>>>>> (consistently adding 1 to
>>>>> the HW interrupt).
>>>>>>>
>>>>>> Hi Marc,
>>>>>>
>>>>>> Without above change I get following crash in kernel while booting.
>>>>>>
>>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>>>>
>>>>>> [    2.441694] ------------[ cut here ]------------
>>>>>>
>>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>>>>
>>>>>> [    2.441702] Modules linked in:
>>>>>>
>>>>>> [    2.441706]
>>>>>>
>>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>>>>
>>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>>>>
>>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>>>>> ffffffc071888000
>>>>>>
>>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>>>>
>>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>>>>
>>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>>>>
>>>>>> if (WARN(hwirq >= domain->hwirq_max,
>>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
>>>> name))
>>>>>>                 return -EINVAL;
>>>>>>
>>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
>>>>>> condition
>>>>> (INTX_NUM + 1) due to which crash is coming.
>>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>>>>
>>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>>>>
>>>>> Your domain should always allocate many interrupts as you have
>>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
>>> 1).
>>>>
>>>> Agreed, but here comes the problem the hwirq for legacy interrupts
>>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
>>>> PCIe specification for legacy interrupts. So these cannot be numbered
>>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
>>>> occurs.
>>>
>>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
>>> 4?
>>>
>> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
>> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
>> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
>> in parameter of struct of_phandle_args.
>> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
>> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
>> to hwirq (*hwirq = fwspec->param[0]).
>> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
>> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
>>
>> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
>> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> 
> Is this resolved yet?  Marc, are you happy, or should we iterate on this
> again?


Ah, sorry to have dropped the ball on this patch.

I guess that given that the infrastructure imposes the hwirq range on
the host drivers, Bharat's approach is the only way (and a number of
other host drivers are already slightly broken). I'll try and have a
look at solving this at the generic level. In the meantime:

Acked-by: Marc Zyngier <marc.zyngier@arm.com>

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13  7:41                   ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-09-13  7:41 UTC (permalink / raw)
  To: Bjorn Helgaas, Bharat Kumar Gogada
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, Michal Simek, arnd,
	linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri

On 12/09/16 23:02, Bjorn Helgaas wrote:
> On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
>>>>>>> Hi Bharat,
>>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>>>>> nwl_pcie
>>>>>>> *pcie)
>>>>>>>>     }
>>>>>>>>
>>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>>>>> -                                                   INTX_NUM,
>>>>>>>> +                                                   INTX_NUM + 1,
>>>>>>>>                                                     &legacy_domain_ops,
>>>>>>>>                                                     pcie);
>>>>>>>
>>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
>>>>>>> the domain allocation should reflect this. On the other hand, the
>>>>>>> way the driver currently deals with mappings is quite broken
>>>>>>> (consistently adding 1 to
>>>>> the HW interrupt).
>>>>>>>
>>>>>> Hi Marc,
>>>>>>
>>>>>> Without above change I get following crash in kernel while booting.
>>>>>>
>>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>>>>
>>>>>> [    2.441694] ------------[ cut here ]------------
>>>>>>
>>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>>>>
>>>>>> [    2.441702] Modules linked in:
>>>>>>
>>>>>> [    2.441706]
>>>>>>
>>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>>>>
>>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>>>>
>>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>>>>> ffffffc071888000
>>>>>>
>>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>>>>
>>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>>>>
>>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>>>>
>>>>>> if (WARN(hwirq >= domain->hwirq_max,
>>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
>>>> name))
>>>>>>                 return -EINVAL;
>>>>>>
>>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
>>>>>> condition
>>>>> (INTX_NUM + 1) due to which crash is coming.
>>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>>>>
>>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>>>>
>>>>> Your domain should always allocate many interrupts as you have
>>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
>>> 1).
>>>>
>>>> Agreed, but here comes the problem the hwirq for legacy interrupts
>>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
>>>> PCIe specification for legacy interrupts. So these cannot be numbered
>>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
>>>> occurs.
>>>
>>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
>>> 4?
>>>
>> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
>> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
>> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
>> in parameter of struct of_phandle_args.
>> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
>> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
>> to hwirq (*hwirq = fwspec->param[0]).
>> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
>> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
>>
>> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
>> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> 
> Is this resolved yet?  Marc, are you happy, or should we iterate on this
> again?


Ah, sorry to have dropped the ball on this patch.

I guess that given that the infrastructure imposes the hwirq range on
the host drivers, Bharat's approach is the only way (and a number of
other host drivers are already slightly broken). I'll try and have a
look at solving this at the generic level. In the meantime:

Acked-by: Marc Zyngier <marc.zyngier@arm.com>

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13  7:41                   ` Marc Zyngier
  0 siblings, 0 replies; 58+ messages in thread
From: Marc Zyngier @ 2016-09-13  7:41 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/09/16 23:02, Bjorn Helgaas wrote:
> On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
>>>>>>> Hi Bharat,
>>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>>>>> nwl_pcie
>>>>>>> *pcie)
>>>>>>>>     }
>>>>>>>>
>>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>>>>> -                                                   INTX_NUM,
>>>>>>>> +                                                   INTX_NUM + 1,
>>>>>>>>                                                     &legacy_domain_ops,
>>>>>>>>                                                     pcie);
>>>>>>>
>>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
>>>>>>> the domain allocation should reflect this. On the other hand, the
>>>>>>> way the driver currently deals with mappings is quite broken
>>>>>>> (consistently adding 1 to
>>>>> the HW interrupt).
>>>>>>>
>>>>>> Hi Marc,
>>>>>>
>>>>>> Without above change I get following crash in kernel while booting.
>>>>>>
>>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>>>>
>>>>>> [    2.441694] ------------[ cut here ]------------
>>>>>>
>>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>>>>
>>>>>> [    2.441702] Modules linked in:
>>>>>>
>>>>>> [    2.441706]
>>>>>>
>>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>>>>
>>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>>>>
>>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>>>>> ffffffc071888000
>>>>>>
>>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>>>>
>>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>>>>
>>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>>>>
>>>>>> if (WARN(hwirq >= domain->hwirq_max,
>>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
>>>> name))
>>>>>>                 return -EINVAL;
>>>>>>
>>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
>>>>>> condition
>>>>> (INTX_NUM + 1) due to which crash is coming.
>>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>>>>
>>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>>>>
>>>>> Your domain should always allocate many interrupts as you have
>>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
>>> 1).
>>>>
>>>> Agreed, but here comes the problem the hwirq for legacy interrupts
>>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
>>>> PCIe specification for legacy interrupts. So these cannot be numbered
>>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
>>>> occurs.
>>>
>>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
>>> 4?
>>>
>> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
>> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
>> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
>> in parameter of struct of_phandle_args.
>> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
>> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
>> to hwirq (*hwirq = fwspec->param[0]).
>> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
>> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
>>
>> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
>> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> 
> Is this resolved yet?  Marc, are you happy, or should we iterate on this
> again?


Ah, sorry to have dropped the ball on this patch.

I guess that given that the infrastructure imposes the hwirq range on
the host drivers, Bharat's approach is the only way (and a number of
other host drivers are already slightly broken). I'll try and have a
look at solving this at the generic level. In the meantime:

Acked-by: Marc Zyngier <marc.zyngier@arm.com>

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
  2016-08-30 10:39 ` Bharat Kumar Gogada
@ 2016-09-13 14:32   ` Bjorn Helgaas
  -1 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 14:32 UTC (permalink / raw)
  To: Bharat Kumar Gogada
  Cc: robh, bhelgaas, colin.king, soren.brinkmann, marc.zyngier,
	michal.simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	rgummal, Bharat Kumar Gogada

Hi Bharat,

On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> The current driver prints pcie core error, for all core events.
> Instead of just printing PCIe core error, now adding prints to show
> individual core events occurred.
> 
> Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> ---
>  drivers/pci/host/pcie-xilinx-nwl.c | 48 +++++++++++++++++++++++++++++++-------
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
> index 3479d30..86c1834 100644
> --- a/drivers/pci/host/pcie-xilinx-nwl.c
> +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> @@ -85,10 +85,15 @@
>  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
>  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
>  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> -
> -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
>  
>  #define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
>  					MSGF_MISC_SR_RXMSG_OVER | \
> @@ -96,9 +101,15 @@
>  					MSGF_MISC_SR_MASTER_ERR | \
>  					MSGF_MISC_SR_I_ADDR_ERR | \
>  					MSGF_MISC_SR_E_ADDR_ERR | \
> +					MSGF_MISC_SR_FATAL_AER | \
> +					MSGF_MISC_SR_NON_FATAL_AER | \
> +					MSGF_MISC_SR_CORR_AER | \
>  					MSGF_MISC_SR_UR_DETECT | \
> -					MSGF_MISC_SR_PCIE_CORE | \
> -					MSGF_MISC_SR_PCIE_CORE_ERR)
> +					MSGF_MISC_SR_NON_FATAL_DEV | \
> +					MSGF_MISC_SR_FATAL_DEV | \
> +					MSGF_MISC_SR_LINK_DOWN | \
> +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
> +					MSGF_MSIC_SR_LINK_BWIDTH)
>  
>  /* Legacy interrupt status mask bits */
>  #define MSGF_LEG_SR_INTA		BIT(0)
> @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
>  		dev_err(pcie->dev,
>  			"In Misc Egress address translation error\n");
>  
> -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> -		dev_err(pcie->dev, "PCIe Core error\n");
> +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> +		dev_err(pcie->dev, "Unsupported request Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> +		dev_err(pcie->dev, "Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> +		dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n");
>  
>  	/* Clear misc interrupt status */
>  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

This patch looks fine, but looking at the code as a whole, I have a
question.  You basically have this:

  misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS) & MSGF_MISC_SR_MASKALL;
  if (!misc_stat)
    return IRQ_NONE;
  ...
  nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

The masking with MSGF_MISC_SR_MASKALL seems wrong.  Let's say
MSGF_MISC_STATUS had some other bit set, e.g., BIT(31), indicating
some yet-unsupported interrupt cause.  BIT(31) is not in
MSGF_MISC_SR_MASKALL, so we return IRQ_NONE without clearing the
interrupt bit in MSGF_MISC_STATUS.  Won't that cause an interrupt
storm where the interrupt is continually re-asserted because we never
clear it?

It seems like it would make more sense to do this:

  misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS);
  if (!misc_stat)
    return IRQ_NONE;
  ...
  if (misc_stat != (misc_stat & MSGF_MISC_SR_MASKALL))
    dev_err(dev, "unexpected IRQ, MSGF_MISC_STATUS %#010x\n", misc_stat);
  nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-09-13 14:32   ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 14:32 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Bharat,

On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> The current driver prints pcie core error, for all core events.
> Instead of just printing PCIe core error, now adding prints to show
> individual core events occurred.
> 
> Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> ---
>  drivers/pci/host/pcie-xilinx-nwl.c | 48 +++++++++++++++++++++++++++++++-------
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
> index 3479d30..86c1834 100644
> --- a/drivers/pci/host/pcie-xilinx-nwl.c
> +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> @@ -85,10 +85,15 @@
>  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
>  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
>  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> -
> -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
>  
>  #define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
>  					MSGF_MISC_SR_RXMSG_OVER | \
> @@ -96,9 +101,15 @@
>  					MSGF_MISC_SR_MASTER_ERR | \
>  					MSGF_MISC_SR_I_ADDR_ERR | \
>  					MSGF_MISC_SR_E_ADDR_ERR | \
> +					MSGF_MISC_SR_FATAL_AER | \
> +					MSGF_MISC_SR_NON_FATAL_AER | \
> +					MSGF_MISC_SR_CORR_AER | \
>  					MSGF_MISC_SR_UR_DETECT | \
> -					MSGF_MISC_SR_PCIE_CORE | \
> -					MSGF_MISC_SR_PCIE_CORE_ERR)
> +					MSGF_MISC_SR_NON_FATAL_DEV | \
> +					MSGF_MISC_SR_FATAL_DEV | \
> +					MSGF_MISC_SR_LINK_DOWN | \
> +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
> +					MSGF_MSIC_SR_LINK_BWIDTH)
>  
>  /* Legacy interrupt status mask bits */
>  #define MSGF_LEG_SR_INTA		BIT(0)
> @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
>  		dev_err(pcie->dev,
>  			"In Misc Egress address translation error\n");
>  
> -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> -		dev_err(pcie->dev, "PCIe Core error\n");
> +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> +		dev_err(pcie->dev, "Unsupported request Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> +		dev_err(pcie->dev, "Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> +		dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n");
>  
>  	/* Clear misc interrupt status */
>  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

This patch looks fine, but looking at the code as a whole, I have a
question.  You basically have this:

  misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS) & MSGF_MISC_SR_MASKALL;
  if (!misc_stat)
    return IRQ_NONE;
  ...
  nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

The masking with MSGF_MISC_SR_MASKALL seems wrong.  Let's say
MSGF_MISC_STATUS had some other bit set, e.g., BIT(31), indicating
some yet-unsupported interrupt cause.  BIT(31) is not in
MSGF_MISC_SR_MASKALL, so we return IRQ_NONE without clearing the
interrupt bit in MSGF_MISC_STATUS.  Won't that cause an interrupt
storm where the interrupt is continually re-asserted because we never
clear it?

It seems like it would make more sense to do this:

  misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS);
  if (!misc_stat)
    return IRQ_NONE;
  ...
  if (misc_stat != (misc_stat & MSGF_MISC_SR_MASKALL))
    dev_err(dev, "unexpected IRQ, MSGF_MISC_STATUS %#010x\n", misc_stat);
  nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-13  7:41                   ` Marc Zyngier
  (?)
@ 2016-09-13 15:05                     ` Bjorn Helgaas
  -1 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:05 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> On 12/09/16 23:02, Bjorn Helgaas wrote:
> > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> >>>>>>> Hi Bharat,
> >>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>>>>>>> nwl_pcie
> >>>>>>> *pcie)
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>>>>>>> -                                                   INTX_NUM,
> >>>>>>>> +                                                   INTX_NUM + 1,
> >>>>>>>>                                                     &legacy_domain_ops,
> >>>>>>>>                                                     pcie);
> >>>>>>>
> >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> >>>>>>> the domain allocation should reflect this. On the other hand, the
> >>>>>>> way the driver currently deals with mappings is quite broken
> >>>>>>> (consistently adding 1 to
> >>>>> the HW interrupt).
> >>>>>>>
> >>>>>> Hi Marc,
> >>>>>>
> >>>>>> Without above change I get following crash in kernel while booting.
> >>>>>>
> >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> >>>>>>
> >>>>>> [    2.441694] ------------[ cut here ]------------
> >>>>>>
> >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >>>>>>
> >>>>>> [    2.441702] Modules linked in:
> >>>>>>
> >>>>>> [    2.441706]
> >>>>>>
> >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >>>>>>
> >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >>>>>>
> >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> >>>>> ffffffc071888000
> >>>>>>
> >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >>>>>>
> >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >>>>>>
> >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> >>>>>>
> >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> >>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> >>>> name))
> >>>>>>                 return -EINVAL;
> >>>>>>
> >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> >>>>>> condition
> >>>>> (INTX_NUM + 1) due to which crash is coming.
> >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> >>>>>
> >>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
> >>>>>
> >>>>> Your domain should always allocate many interrupts as you have
> >>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> >>> 1).
> >>>>
> >>>> Agreed, but here comes the problem the hwirq for legacy interrupts
> >>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> >>>> PCIe specification for legacy interrupts. So these cannot be numbered
> >>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> >>>> occurs.
> >>>
> >>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> >>> 4?
> >>>
> >> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> >> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> >> in parameter of struct of_phandle_args.
> >> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> >> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> >> to hwirq (*hwirq = fwspec->param[0]).
> >> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> >>
> >> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> >> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> > 
> > Is this resolved yet?  Marc, are you happy, or should we iterate on this
> > again?
> 
> Ah, sorry to have dropped the ball on this patch.

No problem, I wasn't making forward progress anyway.

> I guess that given that the infrastructure imposes the hwirq range on
> the host drivers, Bharat's approach is the only way (and a number of
> other host drivers are already slightly broken). I'll try and have a
> look at solving this at the generic level. In the meantime:
> 
> Acked-by: Marc Zyngier <marc.zyngier@arm.com>

After looking at this myself, I'm not happy with this either.  It feels
like there are bugs lurking here and we're just hiding one of them.

Here are the callers of irq_domain_add_linear() for legacy INTx in
drivers/pci/host:

  advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
  dra7xx_pcie_init_irq_domain  4
  ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
  altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
  nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
  xilinx_pcie_init_irq_domain  4

I think all of these use the of_irq_parse_and_map_pci() path you
mentioned, so if the problem is in the way that path works, I would
think these should *all* be requesting the same number of interrupts
in the domain.

I agree with Marc that we should request 4 IRQs, because that's what
we need.  If we can't do that for some reason, we ought to at least
make all these callers the same.

Bjorn

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13 15:05                     ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:05 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> On 12/09/16 23:02, Bjorn Helgaas wrote:
> > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> >>>>>>> Hi Bharat,
> >>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>>>>>>> nwl_pcie
> >>>>>>> *pcie)
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>>>>>>> -                                                   INTX_NUM,
> >>>>>>>> +                                                   INTX_NUM + 1,
> >>>>>>>>                                                     &legacy_domain_ops,
> >>>>>>>>                                                     pcie);
> >>>>>>>
> >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> >>>>>>> the domain allocation should reflect this. On the other hand, the
> >>>>>>> way the driver currently deals with mappings is quite broken
> >>>>>>> (consistently adding 1 to
> >>>>> the HW interrupt).
> >>>>>>>
> >>>>>> Hi Marc,
> >>>>>>
> >>>>>> Without above change I get following crash in kernel while booting.
> >>>>>>
> >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> >>>>>>
> >>>>>> [    2.441694] ------------[ cut here ]------------
> >>>>>>
> >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >>>>>>
> >>>>>> [    2.441702] Modules linked in:
> >>>>>>
> >>>>>> [    2.441706]
> >>>>>>
> >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >>>>>>
> >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >>>>>>
> >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> >>>>> ffffffc071888000
> >>>>>>
> >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >>>>>>
> >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >>>>>>
> >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> >>>>>>
> >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> >>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> >>>> name))
> >>>>>>                 return -EINVAL;
> >>>>>>
> >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> >>>>>> condition
> >>>>> (INTX_NUM + 1) due to which crash is coming.
> >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> >>>>>
> >>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
> >>>>>
> >>>>> Your domain should always allocate many interrupts as you have
> >>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> >>> 1).
> >>>>
> >>>> Agreed, but here comes the problem the hwirq for legacy interrupts
> >>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> >>>> PCIe specification for legacy interrupts. So these cannot be numbered
> >>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> >>>> occurs.
> >>>
> >>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> >>> 4?
> >>>
> >> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> >> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> >> in parameter of struct of_phandle_args.
> >> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> >> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> >> to hwirq (*hwirq = fwspec->param[0]).
> >> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> >>
> >> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> >> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> > 
> > Is this resolved yet?  Marc, are you happy, or should we iterate on this
> > again?
> 
> Ah, sorry to have dropped the ball on this patch.

No problem, I wasn't making forward progress anyway.

> I guess that given that the infrastructure imposes the hwirq range on
> the host drivers, Bharat's approach is the only way (and a number of
> other host drivers are already slightly broken). I'll try and have a
> look at solving this at the generic level. In the meantime:
> 
> Acked-by: Marc Zyngier <marc.zyngier@arm.com>

After looking at this myself, I'm not happy with this either.  It feels
like there are bugs lurking here and we're just hiding one of them.

Here are the callers of irq_domain_add_linear() for legacy INTx in
drivers/pci/host:

  advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
  dra7xx_pcie_init_irq_domain  4
  ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
  altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
  nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
  xilinx_pcie_init_irq_domain  4

I think all of these use the of_irq_parse_and_map_pci() path you
mentioned, so if the problem is in the way that path works, I would
think these should *all* be requesting the same number of interrupts
in the domain.

I agree with Marc that we should request 4 IRQs, because that's what
we need.  If we can't do that for some reason, we ought to at least
make all these callers the same.

Bjorn

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13 15:05                     ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:05 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> On 12/09/16 23:02, Bjorn Helgaas wrote:
> > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> >>>>>>> Hi Bharat,
> >>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> >>>>>>>> nwl_pcie
> >>>>>>> *pcie)
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> >>>>>>>> -                                                   INTX_NUM,
> >>>>>>>> +                                                   INTX_NUM + 1,
> >>>>>>>>                                                     &legacy_domain_ops,
> >>>>>>>>                                                     pcie);
> >>>>>>>
> >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> >>>>>>> the domain allocation should reflect this. On the other hand, the
> >>>>>>> way the driver currently deals with mappings is quite broken
> >>>>>>> (consistently adding 1 to
> >>>>> the HW interrupt).
> >>>>>>>
> >>>>>> Hi Marc,
> >>>>>>
> >>>>>> Without above change I get following crash in kernel while booting.
> >>>>>>
> >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> >>>>>>
> >>>>>> [    2.441694] ------------[ cut here ]------------
> >>>>>>
> >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> >>>>>>
> >>>>>> [    2.441702] Modules linked in:
> >>>>>>
> >>>>>> [    2.441706]
> >>>>>>
> >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> >>>>>>
> >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> >>>>>>
> >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> >>>>> ffffffc071888000
> >>>>>>
> >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> >>>>>>
> >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> >>>>>>
> >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> >>>>>>
> >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> >>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> >>>> name))
> >>>>>>                 return -EINVAL;
> >>>>>>
> >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> >>>>>> condition
> >>>>> (INTX_NUM + 1) due to which crash is coming.
> >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> >>>>>
> >>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
> >>>>>
> >>>>> Your domain should always allocate many interrupts as you have
> >>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> >>> 1).
> >>>>
> >>>> Agreed, but here comes the problem the hwirq for legacy interrupts
> >>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> >>>> PCIe specification for legacy interrupts. So these cannot be numbered
> >>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> >>>> occurs.
> >>>
> >>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> >>> 4?
> >>>
> >> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> >> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> >> in parameter of struct of_phandle_args.
> >> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> >> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> >> to hwirq (*hwirq = fwspec->param[0]).
> >> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> >>
> >> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> >> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> > 
> > Is this resolved yet?  Marc, are you happy, or should we iterate on this
> > again?
> 
> Ah, sorry to have dropped the ball on this patch.

No problem, I wasn't making forward progress anyway.

> I guess that given that the infrastructure imposes the hwirq range on
> the host drivers, Bharat's approach is the only way (and a number of
> other host drivers are already slightly broken). I'll try and have a
> look at solving this at the generic level. In the meantime:
> 
> Acked-by: Marc Zyngier <marc.zyngier@arm.com>

After looking at this myself, I'm not happy with this either.  It feels
like there are bugs lurking here and we're just hiding one of them.

Here are the callers of irq_domain_add_linear() for legacy INTx in
drivers/pci/host:

  advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
  dra7xx_pcie_init_irq_domain  4
  ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
  altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
  nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
  xilinx_pcie_init_irq_domain  4

I think all of these use the of_irq_parse_and_map_pci() path you
mentioned, so if the problem is in the way that path works, I would
think these should *all* be requesting the same number of interrupts
in the domain.

I agree with Marc that we should request 4 IRQs, because that's what
we need.  If we can't do that for some reason, we ought to at least
make all these callers the same.

Bjorn

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
  2016-08-30 10:39 ` Bharat Kumar Gogada
@ 2016-09-13 15:18   ` Bjorn Helgaas
  -1 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:18 UTC (permalink / raw)
  To: Bharat Kumar Gogada
  Cc: robh, bhelgaas, colin.king, soren.brinkmann, marc.zyngier,
	michal.simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	rgummal, Bharat Kumar Gogada

On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> The current driver prints pcie core error, for all core events.
> Instead of just printing PCIe core error, now adding prints to show
> individual core events occurred.
> 
> Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>

I applied the first two patches to pci/host-xilinx for v4.9, thanks!

I'd like to work on the third one a little more, as I mentioned in my
response to it.

> ---
>  drivers/pci/host/pcie-xilinx-nwl.c | 48 +++++++++++++++++++++++++++++++-------
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
> index 3479d30..86c1834 100644
> --- a/drivers/pci/host/pcie-xilinx-nwl.c
> +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> @@ -85,10 +85,15 @@
>  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
>  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
>  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> -
> -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
>  
>  #define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
>  					MSGF_MISC_SR_RXMSG_OVER | \
> @@ -96,9 +101,15 @@
>  					MSGF_MISC_SR_MASTER_ERR | \
>  					MSGF_MISC_SR_I_ADDR_ERR | \
>  					MSGF_MISC_SR_E_ADDR_ERR | \
> +					MSGF_MISC_SR_FATAL_AER | \
> +					MSGF_MISC_SR_NON_FATAL_AER | \
> +					MSGF_MISC_SR_CORR_AER | \
>  					MSGF_MISC_SR_UR_DETECT | \
> -					MSGF_MISC_SR_PCIE_CORE | \
> -					MSGF_MISC_SR_PCIE_CORE_ERR)
> +					MSGF_MISC_SR_NON_FATAL_DEV | \
> +					MSGF_MISC_SR_FATAL_DEV | \
> +					MSGF_MISC_SR_LINK_DOWN | \
> +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
> +					MSGF_MSIC_SR_LINK_BWIDTH)
>  
>  /* Legacy interrupt status mask bits */
>  #define MSGF_LEG_SR_INTA		BIT(0)
> @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
>  		dev_err(pcie->dev,
>  			"In Misc Egress address translation error\n");
>  
> -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> -		dev_err(pcie->dev, "PCIe Core error\n");
> +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> +		dev_err(pcie->dev, "Unsupported request Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> +		dev_err(pcie->dev, "Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> +		dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n");
>  
>  	/* Clear misc interrupt status */
>  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> -- 
> 2.1.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-09-13 15:18   ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> The current driver prints pcie core error, for all core events.
> Instead of just printing PCIe core error, now adding prints to show
> individual core events occurred.
> 
> Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>

I applied the first two patches to pci/host-xilinx for v4.9, thanks!

I'd like to work on the third one a little more, as I mentioned in my
response to it.

> ---
>  drivers/pci/host/pcie-xilinx-nwl.c | 48 +++++++++++++++++++++++++++++++-------
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
> index 3479d30..86c1834 100644
> --- a/drivers/pci/host/pcie-xilinx-nwl.c
> +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> @@ -85,10 +85,15 @@
>  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
>  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
>  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> -
> -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
>  
>  #define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
>  					MSGF_MISC_SR_RXMSG_OVER | \
> @@ -96,9 +101,15 @@
>  					MSGF_MISC_SR_MASTER_ERR | \
>  					MSGF_MISC_SR_I_ADDR_ERR | \
>  					MSGF_MISC_SR_E_ADDR_ERR | \
> +					MSGF_MISC_SR_FATAL_AER | \
> +					MSGF_MISC_SR_NON_FATAL_AER | \
> +					MSGF_MISC_SR_CORR_AER | \
>  					MSGF_MISC_SR_UR_DETECT | \
> -					MSGF_MISC_SR_PCIE_CORE | \
> -					MSGF_MISC_SR_PCIE_CORE_ERR)
> +					MSGF_MISC_SR_NON_FATAL_DEV | \
> +					MSGF_MISC_SR_FATAL_DEV | \
> +					MSGF_MISC_SR_LINK_DOWN | \
> +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
> +					MSGF_MSIC_SR_LINK_BWIDTH)
>  
>  /* Legacy interrupt status mask bits */
>  #define MSGF_LEG_SR_INTA		BIT(0)
> @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
>  		dev_err(pcie->dev,
>  			"In Misc Egress address translation error\n");
>  
> -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> -		dev_err(pcie->dev, "PCIe Core error\n");
> +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> +		dev_err(pcie->dev, "Unsupported request Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> +		dev_err(pcie->dev, "Fatal Error Detected\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n");
> +
> +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> +		dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n");
>  
>  	/* Clear misc interrupt status */
>  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> -- 
> 2.1.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-13 15:05                     ` Bjorn Helgaas
  (?)
@ 2016-09-13 15:34                       ` Bjorn Helgaas
  -1 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:34 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri, Ley Foon Tan, Thomas Petazzoni,
	Kishon Vijay Abraham I, Murali Karicheri

[+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]

On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>>>>> Hi Bharat,
> > >>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> > >>>>>>>> nwl_pcie
> > >>>>>>> *pcie)
> > >>>>>>>>     }
> > >>>>>>>>
> > >>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > >>>>>>>> -                                                   INTX_NUM,
> > >>>>>>>> +                                                   INTX_NUM + 1,
> > >>>>>>>>                                                     &legacy_domain_ops,
> > >>>>>>>>                                                     pcie);
> > >>>>>>>
> > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> > >>>>>>> the domain allocation should reflect this. On the other hand, the
> > >>>>>>> way the driver currently deals with mappings is quite broken
> > >>>>>>> (consistently adding 1 to
> > >>>>> the HW interrupt).
> > >>>>>>>
> > >>>>>> Hi Marc,
> > >>>>>>
> > >>>>>> Without above change I get following crash in kernel while booting.
> > >>>>>>
> > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>>>>
> > >>>>>> [    2.441694] ------------[ cut here ]------------
> > >>>>>>
> > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>>>>
> > >>>>>> [    2.441702] Modules linked in:
> > >>>>>>
> > >>>>>> [    2.441706]
> > >>>>>>
> > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>>>>
> > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>>>>
> > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >>>>> ffffffc071888000
> > >>>>>>
> > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>>>>
> > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> > >>>> name))
> > >>>>>>                 return -EINVAL;
> > >>>>>>
> > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>>>>> condition
> > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>>>>
> > >>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
> > >>>>>
> > >>>>> Your domain should always allocate many interrupts as you have
> > >>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> > >>> 1).
> > >>>>
> > >>>> Agreed, but here comes the problem the hwirq for legacy interrupts
> > >>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > >>>> PCIe specification for legacy interrupts. So these cannot be numbered
> > >>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > >>>> occurs.
> > >>>
> > >>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> > >>> 4?
> > >>>
> > >> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> > >> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> > >> in parameter of struct of_phandle_args.
> > >> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> > >> to hwirq (*hwirq = fwspec->param[0]).
> > >> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> > >>
> > >> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> > >> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> > > 
> > > Is this resolved yet?  Marc, are you happy, or should we iterate on this
> > > again?
> > 
> > Ah, sorry to have dropped the ball on this patch.
> 
> No problem, I wasn't making forward progress anyway.
> 
> > I guess that given that the infrastructure imposes the hwirq range on
> > the host drivers, Bharat's approach is the only way (and a number of
> > other host drivers are already slightly broken). I'll try and have a
> > look at solving this at the generic level. In the meantime:
> > 
> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> 
> After looking at this myself, I'm not happy with this either.  It feels
> like there are bugs lurking here and we're just hiding one of them.
> 
> Here are the callers of irq_domain_add_linear() for legacy INTx in
> drivers/pci/host:
> 
>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>   dra7xx_pcie_init_irq_domain  4
>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>   xilinx_pcie_init_irq_domain  4

The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
error when INTx is 4").  I should have noticed this inconsistency back
then.

Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
only request 4 IRQs and only INTA, INTB, and INTC work?

> I think all of these use the of_irq_parse_and_map_pci() path you
> mentioned, so if the problem is in the way that path works, I would
> think these should *all* be requesting the same number of interrupts
> in the domain.
> 
> I agree with Marc that we should request 4 IRQs, because that's what
> we need.  If we can't do that for some reason, we ought to at least
> make all these callers the same.
> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13 15:34                       ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:34 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri, Ley Foon Tan, Thomas Petazzoni,
	Kishon Vijay Abraham I, Murali Karicheri

[+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]

On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>>>>> Hi Bharat,
> > >>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> > >>>>>>>> nwl_pcie
> > >>>>>>> *pcie)
> > >>>>>>>>     }
> > >>>>>>>>
> > >>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > >>>>>>>> -                                                   INTX_NUM,
> > >>>>>>>> +                                                   INTX_NUM + 1,
> > >>>>>>>>                                                     &legacy_domain_ops,
> > >>>>>>>>                                                     pcie);
> > >>>>>>>
> > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> > >>>>>>> the domain allocation should reflect this. On the other hand, the
> > >>>>>>> way the driver currently deals with mappings is quite broken
> > >>>>>>> (consistently adding 1 to
> > >>>>> the HW interrupt).
> > >>>>>>>
> > >>>>>> Hi Marc,
> > >>>>>>
> > >>>>>> Without above change I get following crash in kernel while booting.
> > >>>>>>
> > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>>>>
> > >>>>>> [    2.441694] ------------[ cut here ]------------
> > >>>>>>
> > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>>>>
> > >>>>>> [    2.441702] Modules linked in:
> > >>>>>>
> > >>>>>> [    2.441706]
> > >>>>>>
> > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>>>>
> > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>>>>
> > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >>>>> ffffffc071888000
> > >>>>>>
> > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>>>>
> > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> > >>>> name))
> > >>>>>>                 return -EINVAL;
> > >>>>>>
> > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>>>>> condition
> > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>>>>
> > >>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
> > >>>>>
> > >>>>> Your domain should always allocate many interrupts as you have
> > >>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> > >>> 1).
> > >>>>
> > >>>> Agreed, but here comes the problem the hwirq for legacy interrupts
> > >>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > >>>> PCIe specification for legacy interrupts. So these cannot be numbered
> > >>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > >>>> occurs.
> > >>>
> > >>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> > >>> 4?
> > >>>
> > >> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> > >> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> > >> in parameter of struct of_phandle_args.
> > >> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> > >> to hwirq (*hwirq = fwspec->param[0]).
> > >> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> > >>
> > >> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> > >> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> > > 
> > > Is this resolved yet?  Marc, are you happy, or should we iterate on this
> > > again?
> > 
> > Ah, sorry to have dropped the ball on this patch.
> 
> No problem, I wasn't making forward progress anyway.
> 
> > I guess that given that the infrastructure imposes the hwirq range on
> > the host drivers, Bharat's approach is the only way (and a number of
> > other host drivers are already slightly broken). I'll try and have a
> > look at solving this at the generic level. In the meantime:
> > 
> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> 
> After looking at this myself, I'm not happy with this either.  It feels
> like there are bugs lurking here and we're just hiding one of them.
> 
> Here are the callers of irq_domain_add_linear() for legacy INTx in
> drivers/pci/host:
> 
>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>   dra7xx_pcie_init_irq_domain  4
>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>   xilinx_pcie_init_irq_domain  4

The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
error when INTx is 4").  I should have noticed this inconsistency back
then.

Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
only request 4 IRQs and only INTA, INTB, and INTC work?

> I think all of these use the of_irq_parse_and_map_pci() path you
> mentioned, so if the problem is in the way that path works, I would
> think these should *all* be requesting the same number of interrupts
> in the domain.
> 
> I agree with Marc that we should request 4 IRQs, because that's what
> we need.  If we can't do that for some reason, we ought to at least
> make all these callers the same.
> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13 15:34                       ` Bjorn Helgaas
  0 siblings, 0 replies; 58+ messages in thread
From: Bjorn Helgaas @ 2016-09-13 15:34 UTC (permalink / raw)
  To: linux-arm-kernel

[+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]

On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>>>>> Hi Bharat,
> > >>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
> > >>>>>>>> nwl_pcie
> > >>>>>>> *pcie)
> > >>>>>>>>     }
> > >>>>>>>>
> > >>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
> > >>>>>>>> -                                                   INTX_NUM,
> > >>>>>>>> +                                                   INTX_NUM + 1,
> > >>>>>>>>                                                     &legacy_domain_ops,
> > >>>>>>>>                                                     pcie);
> > >>>>>>>
> > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
> > >>>>>>> the domain allocation should reflect this. On the other hand, the
> > >>>>>>> way the driver currently deals with mappings is quite broken
> > >>>>>>> (consistently adding 1 to
> > >>>>> the HW interrupt).
> > >>>>>>>
> > >>>>>> Hi Marc,
> > >>>>>>
> > >>>>>> Without above change I get following crash in kernel while booting.
> > >>>>>>
> > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>>>>
> > >>>>>> [    2.441694] ------------[ cut here ]------------
> > >>>>>>
> > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>>>>
> > >>>>>> [    2.441702] Modules linked in:
> > >>>>>>
> > >>>>>> [    2.441706]
> > >>>>>>
> > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>>>>
> > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>>>>
> > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >>>>> ffffffc071888000
> > >>>>>>
> > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>>>>
> > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
> > >>>> name))
> > >>>>>>                 return -EINVAL;
> > >>>>>>
> > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>>>>> condition
> > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>>>>
> > >>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
> > >>>>>
> > >>>>> Your domain should always allocate many interrupts as you have
> > >>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
> > >>> 1).
> > >>>>
> > >>>> Agreed, but here comes the problem the hwirq for legacy interrupts
> > >>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
> > >>>> PCIe specification for legacy interrupts. So these cannot be numbered
> > >>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
> > >>>> occurs.
> > >>>
> > >>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
> > >>> 4?
> > >>>
> > >> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
> > >> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
> > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
> > >> in parameter of struct of_phandle_args.
> > >> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
> > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
> > >> to hwirq (*hwirq = fwspec->param[0]).
> > >> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
> > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
> > >>
> > >> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
> > >> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
> > > 
> > > Is this resolved yet?  Marc, are you happy, or should we iterate on this
> > > again?
> > 
> > Ah, sorry to have dropped the ball on this patch.
> 
> No problem, I wasn't making forward progress anyway.
> 
> > I guess that given that the infrastructure imposes the hwirq range on
> > the host drivers, Bharat's approach is the only way (and a number of
> > other host drivers are already slightly broken). I'll try and have a
> > look at solving this at the generic level. In the meantime:
> > 
> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> 
> After looking at this myself, I'm not happy with this either.  It feels
> like there are bugs lurking here and we're just hiding one of them.
> 
> Here are the callers of irq_domain_add_linear() for legacy INTx in
> drivers/pci/host:
> 
>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>   dra7xx_pcie_init_irq_domain  4
>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>   xilinx_pcie_init_irq_domain  4

The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
error when INTx is 4").  I should have noticed this inconsistency back
then.

Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
only request 4 IRQs and only INTA, INTB, and INTC work?

> I think all of these use the of_irq_parse_and_map_pci() path you
> mentioned, so if the problem is in the way that path works, I would
> think these should *all* be requesting the same number of interrupts
> in the domain.
> 
> I agree with Marc that we should request 4 IRQs, because that's what
> we need.  If we can't do that for some reason, we ought to at least
> make all these callers the same.
> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-13 15:34                       ` Bjorn Helgaas
  (?)
@ 2016-09-13 15:50                         ` Thomas Petazzoni
  -1 siblings, 0 replies; 58+ messages in thread
From: Thomas Petazzoni @ 2016-09-13 15:50 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Marc Zyngier, Bharat Kumar Gogada, robh, bhelgaas, colin.king,
	Soren Brinkmann, Michal Simek, arnd, linux-arm-kernel, linux-pci,
	linux-kernel, Ravikiran Gummaluri, Ley Foon Tan,
	Kishon Vijay Abraham I, Murali Karicheri

Hello,

On Tue, 13 Sep 2016 10:34:02 -0500, Bjorn Helgaas wrote:

> > After looking at this myself, I'm not happy with this either.  It feels
> > like there are bugs lurking here and we're just hiding one of them.
> > 
> > Here are the callers of irq_domain_add_linear() for legacy INTx in
> > drivers/pci/host:
> > 
> >   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
> >   dra7xx_pcie_init_irq_domain  4
> >   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
> >   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
> >   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
> >   xilinx_pcie_init_irq_domain  4  
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back
> then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
> only request 4 IRQs and only INTA, INTB, and INTC work?
> 
> > I think all of these use the of_irq_parse_and_map_pci() path you
> > mentioned, so if the problem is in the way that path works, I would
> > think these should *all* be requesting the same number of interrupts
> > in the domain.
> > 
> > I agree with Marc that we should request 4 IRQs, because that's what
> > we need.  If we can't do that for some reason, we ought to at least
> > make all these callers the same.

Thanks for Cc'ing about this issue. Indeed, the Aardvark driver
supports all of INT{A,B,C,D}, so the current situation doesn't work. As
suggested, the simplest solution is to just allocate an irq domain with
5 IRQs, like is done in the Altera driver.

However, my feeling is that a more correct solution would be to have a
translation between the PCI_INTERRUPT_PIN value (in the range 0x1 to
0x4) to the hwirq value (in the range 0x0 to 0x3).

Best regards,

Thomas
-- 
Thomas Petazzoni, CTO, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13 15:50                         ` Thomas Petazzoni
  0 siblings, 0 replies; 58+ messages in thread
From: Thomas Petazzoni @ 2016-09-13 15:50 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Marc Zyngier, Bharat Kumar Gogada, robh, bhelgaas, colin.king,
	Soren Brinkmann, Michal Simek, arnd, linux-arm-kernel, linux-pci,
	linux-kernel, Ravikiran Gummaluri, Ley Foon Tan,
	Kishon Vijay Abraham I, Murali Karicheri

Hello,

On Tue, 13 Sep 2016 10:34:02 -0500, Bjorn Helgaas wrote:

> > After looking at this myself, I'm not happy with this either.  It feels
> > like there are bugs lurking here and we're just hiding one of them.
> > 
> > Here are the callers of irq_domain_add_linear() for legacy INTx in
> > drivers/pci/host:
> > 
> >   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
> >   dra7xx_pcie_init_irq_domain  4
> >   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
> >   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
> >   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
> >   xilinx_pcie_init_irq_domain  4  
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back
> then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
> only request 4 IRQs and only INTA, INTB, and INTC work?
> 
> > I think all of these use the of_irq_parse_and_map_pci() path you
> > mentioned, so if the problem is in the way that path works, I would
> > think these should *all* be requesting the same number of interrupts
> > in the domain.
> > 
> > I agree with Marc that we should request 4 IRQs, because that's what
> > we need.  If we can't do that for some reason, we ought to at least
> > make all these callers the same.

Thanks for Cc'ing about this issue. Indeed, the Aardvark driver
supports all of INT{A,B,C,D}, so the current situation doesn't work. As
suggested, the simplest solution is to just allocate an irq domain with
5 IRQs, like is done in the Altera driver.

However, my feeling is that a more correct solution would be to have a
translation between the PCI_INTERRUPT_PIN value (in the range 0x1 to
0x4) to the hwirq value (in the range 0x0 to 0x3).

Best regards,

Thomas
-- 
Thomas Petazzoni, CTO, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-13 15:50                         ` Thomas Petazzoni
  0 siblings, 0 replies; 58+ messages in thread
From: Thomas Petazzoni @ 2016-09-13 15:50 UTC (permalink / raw)
  To: linux-arm-kernel

Hello,

On Tue, 13 Sep 2016 10:34:02 -0500, Bjorn Helgaas wrote:

> > After looking at this myself, I'm not happy with this either.  It feels
> > like there are bugs lurking here and we're just hiding one of them.
> > 
> > Here are the callers of irq_domain_add_linear() for legacy INTx in
> > drivers/pci/host:
> > 
> >   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
> >   dra7xx_pcie_init_irq_domain  4
> >   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
> >   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
> >   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
> >   xilinx_pcie_init_irq_domain  4  
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back
> then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
> only request 4 IRQs and only INTA, INTB, and INTC work?
> 
> > I think all of these use the of_irq_parse_and_map_pci() path you
> > mentioned, so if the problem is in the way that path works, I would
> > think these should *all* be requesting the same number of interrupts
> > in the domain.
> > 
> > I agree with Marc that we should request 4 IRQs, because that's what
> > we need.  If we can't do that for some reason, we ought to at least
> > make all these callers the same.

Thanks for Cc'ing about this issue. Indeed, the Aardvark driver
supports all of INT{A,B,C,D}, so the current situation doesn't work. As
suggested, the simplest solution is to just allocate an irq domain with
5 IRQs, like is done in the Altera driver.

However, my feeling is that a more correct solution would be to have a
translation between the PCI_INTERRUPT_PIN value (in the range 0x1 to
0x4) to the hwirq value (in the range 0x0 to 0x3).

Best regards,

Thomas
-- 
Thomas Petazzoni, CTO, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
  2016-09-13 14:32   ` Bjorn Helgaas
  (?)
@ 2016-09-14  5:26     ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:26 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, marc.zyngier,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

> Hi Bharat,
> 
> On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> > The current driver prints pcie core error, for all core events.
> > Instead of just printing PCIe core error, now adding prints to show
> > individual core events occurred.
> >
> > Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> > ---
> >  drivers/pci/host/pcie-xilinx-nwl.c | 48
> > +++++++++++++++++++++++++++++++-------
> >  1 file changed, 40 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/pci/host/pcie-xilinx-nwl.c
> > b/drivers/pci/host/pcie-xilinx-nwl.c
> > index 3479d30..86c1834 100644
> > --- a/drivers/pci/host/pcie-xilinx-nwl.c
> > +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> > @@ -85,10 +85,15 @@
> >  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
> >  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
> >  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> > -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> > -
> > -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> > -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> > +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> > +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> > +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> > +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> > +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> > +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> > +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> > +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> > +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
> >
> >  #define MSGF_MISC_SR_MASKALL
> 	(MSGF_MISC_SR_RXMSG_AVAIL | \
> >  					MSGF_MISC_SR_RXMSG_OVER | \
> > @@ -96,9 +101,15 @@
> >  					MSGF_MISC_SR_MASTER_ERR | \
> >  					MSGF_MISC_SR_I_ADDR_ERR | \
> >  					MSGF_MISC_SR_E_ADDR_ERR | \
> > +					MSGF_MISC_SR_FATAL_AER | \
> > +					MSGF_MISC_SR_NON_FATAL_AER | \
> > +					MSGF_MISC_SR_CORR_AER | \
> >  					MSGF_MISC_SR_UR_DETECT | \
> > -					MSGF_MISC_SR_PCIE_CORE | \
> > -					MSGF_MISC_SR_PCIE_CORE_ERR)
> > +					MSGF_MISC_SR_NON_FATAL_DEV | \
> > +					MSGF_MISC_SR_FATAL_DEV | \
> > +					MSGF_MISC_SR_LINK_DOWN | \
> > +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH
> | \
> > +					MSGF_MSIC_SR_LINK_BWIDTH)
> >
> >  /* Legacy interrupt status mask bits */
> >  #define MSGF_LEG_SR_INTA		BIT(0)
> > @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void
> *data)
> >  		dev_err(pcie->dev,
> >  			"In Misc Egress address translation error\n");
> >
> > -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> > -		dev_err(pcie->dev, "PCIe Core error\n");
> > +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> > +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> > +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> > +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> > +		dev_err(pcie->dev, "Unsupported request Detected\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> > +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> > +		dev_err(pcie->dev, "Fatal Error Detected\n");
> > +
> > +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> > +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management
> Status
> > +bit set\n");
> > +
> > +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> > +		dev_info(pcie->dev, "Link Bandwidth Management Status bit
> set\n");
> >
> >  	/* Clear misc interrupt status */
> >  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> 
> This patch looks fine, but looking at the code as a whole, I have a question.  You
> basically have this:
> 
>   misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS) &
> MSGF_MISC_SR_MASKALL;
>   if (!misc_stat)
>     return IRQ_NONE;
>   ...
>   nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> 
> The masking with MSGF_MISC_SR_MASKALL seems wrong.  Let's say
> MSGF_MISC_STATUS had some other bit set, e.g., BIT(31), indicating some yet-
> unsupported interrupt cause.  BIT(31) is not in MSGF_MISC_SR_MASKALL, so we
> return IRQ_NONE without clearing the interrupt bit in MSGF_MISC_STATUS.
> Won't that cause an interrupt storm where the interrupt is continually re-
> asserted because we never clear it?
> 
> It seems like it would make more sense to do this:
> 
>   misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS);
>   if (!misc_stat)
>     return IRQ_NONE;
>   ...
>   if (misc_stat != (misc_stat & MSGF_MISC_SR_MASKALL))
>     dev_err(dev, "unexpected IRQ, MSGF_MISC_STATUS %#010x\n", misc_stat);
>   nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

Hi Bjorn,

Thanks for the suggestion. The above mentioned bits are reserved and they will never go high. 
We haven't seen such kind of issue until now. 
So I feel the current implementation is fine.

Thanks & Regards,
Bharat

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-09-14  5:26     ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:26 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: arnd, marc.zyngier, linux-pci, Ravikiran Gummaluri, linux-kernel,
	Michal Simek, Soren Brinkmann, bhelgaas, colin.king,
	linux-arm-kernel

> Hi Bharat,
> 
> On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> > The current driver prints pcie core error, for all core events.
> > Instead of just printing PCIe core error, now adding prints to show
> > individual core events occurred.
> >
> > Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> > ---
> >  drivers/pci/host/pcie-xilinx-nwl.c | 48
> > +++++++++++++++++++++++++++++++-------
> >  1 file changed, 40 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/pci/host/pcie-xilinx-nwl.c
> > b/drivers/pci/host/pcie-xilinx-nwl.c
> > index 3479d30..86c1834 100644
> > --- a/drivers/pci/host/pcie-xilinx-nwl.c
> > +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> > @@ -85,10 +85,15 @@
> >  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
> >  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
> >  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> > -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> > -
> > -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> > -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> > +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> > +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> > +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> > +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> > +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> > +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> > +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> > +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> > +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
> >
> >  #define MSGF_MISC_SR_MASKALL
> 	(MSGF_MISC_SR_RXMSG_AVAIL | \
> >  					MSGF_MISC_SR_RXMSG_OVER | \
> > @@ -96,9 +101,15 @@
> >  					MSGF_MISC_SR_MASTER_ERR | \
> >  					MSGF_MISC_SR_I_ADDR_ERR | \
> >  					MSGF_MISC_SR_E_ADDR_ERR | \
> > +					MSGF_MISC_SR_FATAL_AER | \
> > +					MSGF_MISC_SR_NON_FATAL_AER | \
> > +					MSGF_MISC_SR_CORR_AER | \
> >  					MSGF_MISC_SR_UR_DETECT | \
> > -					MSGF_MISC_SR_PCIE_CORE | \
> > -					MSGF_MISC_SR_PCIE_CORE_ERR)
> > +					MSGF_MISC_SR_NON_FATAL_DEV | \
> > +					MSGF_MISC_SR_FATAL_DEV | \
> > +					MSGF_MISC_SR_LINK_DOWN | \
> > +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH
> | \
> > +					MSGF_MSIC_SR_LINK_BWIDTH)
> >
> >  /* Legacy interrupt status mask bits */
> >  #define MSGF_LEG_SR_INTA		BIT(0)
> > @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void
> *data)
> >  		dev_err(pcie->dev,
> >  			"In Misc Egress address translation error\n");
> >
> > -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> > -		dev_err(pcie->dev, "PCIe Core error\n");
> > +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> > +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> > +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> > +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> > +		dev_err(pcie->dev, "Unsupported request Detected\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> > +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> > +		dev_err(pcie->dev, "Fatal Error Detected\n");
> > +
> > +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> > +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management
> Status
> > +bit set\n");
> > +
> > +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> > +		dev_info(pcie->dev, "Link Bandwidth Management Status bit
> set\n");
> >
> >  	/* Clear misc interrupt status */
> >  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> 
> This patch looks fine, but looking at the code as a whole, I have a question.  You
> basically have this:
> 
>   misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS) &
> MSGF_MISC_SR_MASKALL;
>   if (!misc_stat)
>     return IRQ_NONE;
>   ...
>   nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> 
> The masking with MSGF_MISC_SR_MASKALL seems wrong.  Let's say
> MSGF_MISC_STATUS had some other bit set, e.g., BIT(31), indicating some yet-
> unsupported interrupt cause.  BIT(31) is not in MSGF_MISC_SR_MASKALL, so we
> return IRQ_NONE without clearing the interrupt bit in MSGF_MISC_STATUS.
> Won't that cause an interrupt storm where the interrupt is continually re-
> asserted because we never clear it?
> 
> It seems like it would make more sense to do this:
> 
>   misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS);
>   if (!misc_stat)
>     return IRQ_NONE;
>   ...
>   if (misc_stat != (misc_stat & MSGF_MISC_SR_MASKALL))
>     dev_err(dev, "unexpected IRQ, MSGF_MISC_STATUS %#010x\n", misc_stat);
>   nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

Hi Bjorn,

Thanks for the suggestion. The above mentioned bits are reserved and they will never go high. 
We haven't seen such kind of issue until now. 
So I feel the current implementation is fine.

Thanks & Regards,
Bharat

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-09-14  5:26     ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:26 UTC (permalink / raw)
  To: linux-arm-kernel

> Hi Bharat,
> 
> On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> > The current driver prints pcie core error, for all core events.
> > Instead of just printing PCIe core error, now adding prints to show
> > individual core events occurred.
> >
> > Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> > ---
> >  drivers/pci/host/pcie-xilinx-nwl.c | 48
> > +++++++++++++++++++++++++++++++-------
> >  1 file changed, 40 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/pci/host/pcie-xilinx-nwl.c
> > b/drivers/pci/host/pcie-xilinx-nwl.c
> > index 3479d30..86c1834 100644
> > --- a/drivers/pci/host/pcie-xilinx-nwl.c
> > +++ b/drivers/pci/host/pcie-xilinx-nwl.c
> > @@ -85,10 +85,15 @@
> >  #define MSGF_MISC_SR_MASTER_ERR		BIT(5)
> >  #define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
> >  #define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
> > -#define MSGF_MISC_SR_UR_DETECT          BIT(20)
> > -
> > -#define MSGF_MISC_SR_PCIE_CORE		GENMASK(18, 16)
> > -#define MSGF_MISC_SR_PCIE_CORE_ERR	GENMASK(31, 22)
> > +#define MSGF_MISC_SR_FATAL_AER		BIT(16)
> > +#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
> > +#define MSGF_MISC_SR_CORR_AER		BIT(18)
> > +#define MSGF_MISC_SR_UR_DETECT		BIT(20)
> > +#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
> > +#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
> > +#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
> > +#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
> > +#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
> >
> >  #define MSGF_MISC_SR_MASKALL
> 	(MSGF_MISC_SR_RXMSG_AVAIL | \
> >  					MSGF_MISC_SR_RXMSG_OVER | \
> > @@ -96,9 +101,15 @@
> >  					MSGF_MISC_SR_MASTER_ERR | \
> >  					MSGF_MISC_SR_I_ADDR_ERR | \
> >  					MSGF_MISC_SR_E_ADDR_ERR | \
> > +					MSGF_MISC_SR_FATAL_AER | \
> > +					MSGF_MISC_SR_NON_FATAL_AER | \
> > +					MSGF_MISC_SR_CORR_AER | \
> >  					MSGF_MISC_SR_UR_DETECT | \
> > -					MSGF_MISC_SR_PCIE_CORE | \
> > -					MSGF_MISC_SR_PCIE_CORE_ERR)
> > +					MSGF_MISC_SR_NON_FATAL_DEV | \
> > +					MSGF_MISC_SR_FATAL_DEV | \
> > +					MSGF_MISC_SR_LINK_DOWN | \
> > +					MSGF_MSIC_SR_LINK_AUTO_BWIDTH
> | \
> > +					MSGF_MSIC_SR_LINK_BWIDTH)
> >
> >  /* Legacy interrupt status mask bits */
> >  #define MSGF_LEG_SR_INTA		BIT(0)
> > @@ -291,8 +302,29 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void
> *data)
> >  		dev_err(pcie->dev,
> >  			"In Misc Egress address translation error\n");
> >
> > -	if (misc_stat & MSGF_MISC_SR_PCIE_CORE_ERR)
> > -		dev_err(pcie->dev, "PCIe Core error\n");
> > +	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
> > +		dev_err(pcie->dev, "Fatal Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
> > +		dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_CORR_AER)
> > +		dev_err(pcie->dev, "Correctable Error in AER Capability\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
> > +		dev_err(pcie->dev, "Unsupported request Detected\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
> > +		dev_err(pcie->dev, "Non-Fatal Error Detected\n");
> > +
> > +	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
> > +		dev_err(pcie->dev, "Fatal Error Detected\n");
> > +
> > +	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
> > +		dev_info(pcie->dev, "Link Autonomous Bandwidth Management
> Status
> > +bit set\n");
> > +
> > +	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
> > +		dev_info(pcie->dev, "Link Bandwidth Management Status bit
> set\n");
> >
> >  	/* Clear misc interrupt status */
> >  	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> 
> This patch looks fine, but looking at the code as a whole, I have a question.  You
> basically have this:
> 
>   misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS) &
> MSGF_MISC_SR_MASKALL;
>   if (!misc_stat)
>     return IRQ_NONE;
>   ...
>   nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
> 
> The masking with MSGF_MISC_SR_MASKALL seems wrong.  Let's say
> MSGF_MISC_STATUS had some other bit set, e.g., BIT(31), indicating some yet-
> unsupported interrupt cause.  BIT(31) is not in MSGF_MISC_SR_MASKALL, so we
> return IRQ_NONE without clearing the interrupt bit in MSGF_MISC_STATUS.
> Won't that cause an interrupt storm where the interrupt is continually re-
> asserted because we never clear it?
> 
> It seems like it would make more sense to do this:
> 
>   misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS);
>   if (!misc_stat)
>     return IRQ_NONE;
>   ...
>   if (misc_stat != (misc_stat & MSGF_MISC_SR_MASKALL))
>     dev_err(dev, "unexpected IRQ, MSGF_MISC_STATUS %#010x\n", misc_stat);
>   nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);

Hi Bjorn,

Thanks for the suggestion. The above mentioned bits are reserved and they will never go high. 
We haven't seen such kind of issue until now. 
So I feel the current implementation is fine.

Thanks & Regards,
Bharat

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
  2016-09-13 15:18   ` Bjorn Helgaas
  (?)
@ 2016-09-14  5:28     ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:28 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, marc.zyngier,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

> On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> > The current driver prints pcie core error, for all core events.
> > Instead of just printing PCIe core error, now adding prints to show
> > individual core events occurred.
> >
> > Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> 
> I applied the first two patches to pci/host-xilinx for v4.9, thanks!
> 
> I'd like to work on the third one a little more, as I mentioned in my response to
> it.
> 
Thanks Bjorn, yes I agree that third one needs better way of handling.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-09-14  5:28     ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:28 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, marc.zyngier,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri

> On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> > The current driver prints pcie core error, for all core events.
> > Instead of just printing PCIe core error, now adding prints to show
> > individual core events occurred.
> >
> > Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
>=20
> I applied the first two patches to pci/host-xilinx for v4.9, thanks!
>=20
> I'd like to work on the third one a little more, as I mentioned in my res=
ponse to
> it.
>=20
Thanks Bjorn, yes I agree that third one needs better way of handling.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred.
@ 2016-09-14  5:28     ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:28 UTC (permalink / raw)
  To: linux-arm-kernel

> On Tue, Aug 30, 2016 at 04:09:16PM +0530, Bharat Kumar Gogada wrote:
> > The current driver prints pcie core error, for all core events.
> > Instead of just printing PCIe core error, now adding prints to show
> > individual core events occurred.
> >
> > Signed-off-by: Bharat Kumar Gogada <bharatku@xilinx.com>
> 
> I applied the first two patches to pci/host-xilinx for v4.9, thanks!
> 
> I'd like to work on the third one a little more, as I mentioned in my response to
> it.
> 
Thanks Bjorn, yes I agree that third one needs better way of handling.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-13 15:34                       ` Bjorn Helgaas
  (?)
@ 2016-09-14  5:34                         ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:34 UTC (permalink / raw)
  To: Bjorn Helgaas, Marc Zyngier
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, Michal Simek, arnd,
	linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri,
	Ley Foon Tan, Thomas Petazzoni, Kishon Vijay Abraham I,
	Murali Karicheri

> [+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]
> 
> On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
> > On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > > >>>>>>> Hi Bharat,
> > > >>>>>>>> @@ -561,7 +561,7 @@ static int
> > > >>>>>>>> nwl_pcie_init_irq_domain(struct nwl_pcie
> > > >>>>>>> *pcie)
> > > >>>>>>>>     }
> > > >>>>>>>>
> > > >>>>>>>>     pcie->legacy_irq_domain =
> irq_domain_add_linear(legacy_intc_node,
> > > >>>>>>>> -                                                   INTX_NUM,
> > > >>>>>>>> +
> > > >>>>>>>> + INTX_NUM + 1,
> > > >>>>>>>>                                                     &legacy_domain_ops,
> > > >>>>>>>>                                                     pcie);
> > > >>>>>>>
> > > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM
> > > >>>>>>> irqs, so the domain allocation should reflect this. On the
> > > >>>>>>> other hand, the way the driver currently deals with mappings
> > > >>>>>>> is quite broken (consistently adding 1 to
> > > >>>>> the HW interrupt).
> > > >>>>>>>
> > > >>>>>> Hi Marc,
> > > >>>>>>
> > > >>>>>> Without above change I get following crash in kernel while booting.
> > > >>>>>>
> > > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > > >>>>>>
> > > >>>>>> [    2.441694] ------------[ cut here ]------------
> > > >>>>>>
> > > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > > >>>>>>
> > > >>>>>> [    2.441702] Modules linked in:
> > > >>>>>>
> > > >>>>>> [    2.441706]
> > > >>>>>>
> > > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > > >>>>>>
> > > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > > >>>>>>
> > > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > > >>>>> ffffffc071888000
> > > >>>>>>
> > > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > > >>>>>>
> > > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > > >>>>>>
> > > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > > >>>>>>
> > > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > > >>>>>>                  "error: hwirq 0x%x is too large for %s\n",
> > > >>>>>> (int)hwirq, domain-
> > > >>>> name))
> > > >>>>>>                 return -EINVAL;
> > > >>>>>>
> > > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > > >>>>>> condition
> > > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > > >>>>>
> > > >>>>> I understood that. I'm still persisting in saying that you have the wrong
> fix.
> > > >>>>>
> > > >>>>> Your domain should always allocate many interrupts as you have
> > > >>>>> interrupt sources. These interrupts (hwirq) should be numbered
> > > >>>>> from 0 to (n-
> > > >>> 1).
> > > >>>>
> > > >>>> Agreed, but here comes the problem the hwirq for legacy
> > > >>>> interrupts will start at 0x1 to 0x4 (INTA to INTD) and these
> > > >>>> values are as per PCIe specification for legacy interrupts. So
> > > >>>> these cannot be numbered from 0. So when 0x4 (INTD) for a
> > > >>>> multi-function device comes the crash occurs.
> > > >>>
> > > >>> So who provides this hwirq? Who calls irq_domain_associate()
> > > >>> with hwirq set to 4?
> > > >>>
> > > >> PCIe subsystem invokes pcibios_add_device function in
> arch/arm64/kernel/pci.c for every pci device.
> > > >> The purpose of this function is to assign dev->irq using
> of_irq_parse_and_map_pci.
> > > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads
> > > >> PCI_INTERRUPT_PIN from configuration space and saves it in parameter
> of struct of_phandle_args.
> > > >> This structure is passed to irq_create_of_mapping where it invokes
> irq_create_fwspec_mapping.
> > > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets
> > > >> hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned to
> hwirq (*hwirq = fwspec->param[0]).
> > > >> And then using this hwirq irq_create_mapping -> irq_domain_associate
> were invoked and mapping is created for virtual irq with this hwirq.
> > > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4
> and so hwirq starts from 0x1 to 0x4.
> > > >>
> > > >> So the values are more generic w.r.t to protocol, that's why hwirq will
> range from 0x1 to 0x4.
> > > >> And then if you check pcie-altera.c they are doing this adding one in their
> handler and while creating legacy domain.
> > > >
> > > > Is this resolved yet?  Marc, are you happy, or should we iterate
> > > > on this again?
> > >
> > > Ah, sorry to have dropped the ball on this patch.
> >
> > No problem, I wasn't making forward progress anyway.
> >
> > > I guess that given that the infrastructure imposes the hwirq range
> > > on the host drivers, Bharat's approach is the only way (and a number
> > > of other host drivers are already slightly broken). I'll try and
> > > have a look at solving this at the generic level. In the meantime:
> > >
> > > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> >
> > After looking at this myself, I'm not happy with this either.  It
> > feels like there are bugs lurking here and we're just hiding one of them.
> >
> > Here are the callers of irq_domain_add_linear() for legacy INTx in
> > drivers/pci/host:
> >
> >   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
> >   dra7xx_pcie_init_irq_domain  4
> >   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
> >   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
> >   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
> >   xilinx_pcie_init_irq_domain  4
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they only
> request 4 IRQs and only INTA, INTB, and INTC work?
> 
Hi Bjorn,

xilinx (non-NWL) will also work with all 4 INTA, INTB, INTc and INTD, but I haven't sent patches
for this yet, because by that time marc has already raised issue regarding the fix w.r.t this. So
once a proper fix was agreed upon for Xilinx-nwl, I will send fix for this also.

Thanks & Regards,
Bharat

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-14  5:34                         ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:34 UTC (permalink / raw)
  To: Bjorn Helgaas, Marc Zyngier
  Cc: robh, bhelgaas, colin.king, Soren Brinkmann, Michal Simek, arnd,
	linux-arm-kernel, linux-pci, linux-kernel, Ravikiran Gummaluri,
	Ley Foon Tan, Thomas Petazzoni, Kishon Vijay Abraham I,
	Murali Karicheri

> [+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keyst=
one)]
>=20
> On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
> > On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote=
:
> > > >>>>>>> Hi Bharat,
> > > >>>>>>>> @@ -561,7 +561,7 @@ static int
> > > >>>>>>>> nwl_pcie_init_irq_domain(struct nwl_pcie
> > > >>>>>>> *pcie)
> > > >>>>>>>>     }
> > > >>>>>>>>
> > > >>>>>>>>     pcie->legacy_irq_domain =3D
> irq_domain_add_linear(legacy_intc_node,
> > > >>>>>>>> -                                                   INTX_NUM=
,
> > > >>>>>>>> +
> > > >>>>>>>> + INTX_NUM + 1,
> > > >>>>>>>>                                                     &legacy_=
domain_ops,
> > > >>>>>>>>                                                     pcie);
> > > >>>>>>>
> > > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM
> > > >>>>>>> irqs, so the domain allocation should reflect this. On the
> > > >>>>>>> other hand, the way the driver currently deals with mappings
> > > >>>>>>> is quite broken (consistently adding 1 to
> > > >>>>> the HW interrupt).
> > > >>>>>>>
> > > >>>>>> Hi Marc,
> > > >>>>>>
> > > >>>>>> Without above change I get following crash in kernel while boo=
ting.
> > > >>>>>>
> > > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > > >>>>>>
> > > >>>>>> [    2.441694] ------------[ cut here ]------------
> > > >>>>>>
> > > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > > >>>>>>
> > > >>>>>> [    2.441702] Modules linked in:
> > > >>>>>>
> > > >>>>>> [    2.441706]
> > > >>>>>>
> > > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0=
 #8
> > > >>>>>>
> > > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > > >>>>>>
> > > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 tas=
k.ti:
> > > >>>>> ffffffc071888000
> > > >>>>>>
> > > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > > >>>>>>
> > > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > > >>>>>>
> > > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > > >>>>>>
> > > >>>>>> if (WARN(hwirq >=3D domain->hwirq_max,
> > > >>>>>>                  "error: hwirq 0x%x is too large for %s\n",
> > > >>>>>> (int)hwirq, domain-
> > > >>>> name))
> > > >>>>>>                 return -EINVAL;
> > > >>>>>>
> > > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > > >>>>>> condition
> > > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > > >>>>>> This is happening as the legacy interrupts are starting from 1=
 (INTA).
> > > >>>>>
> > > >>>>> I understood that. I'm still persisting in saying that you have=
 the wrong
> fix.
> > > >>>>>
> > > >>>>> Your domain should always allocate many interrupts as you have
> > > >>>>> interrupt sources. These interrupts (hwirq) should be numbered
> > > >>>>> from 0 to (n-
> > > >>> 1).
> > > >>>>
> > > >>>> Agreed, but here comes the problem the hwirq for legacy
> > > >>>> interrupts will start at 0x1 to 0x4 (INTA to INTD) and these
> > > >>>> values are as per PCIe specification for legacy interrupts. So
> > > >>>> these cannot be numbered from 0. So when 0x4 (INTD) for a
> > > >>>> multi-function device comes the crash occurs.
> > > >>>
> > > >>> So who provides this hwirq? Who calls irq_domain_associate()
> > > >>> with hwirq set to 4?
> > > >>>
> > > >> PCIe subsystem invokes pcibios_add_device function in
> arch/arm64/kernel/pci.c for every pci device.
> > > >> The purpose of this function is to assign dev->irq using
> of_irq_parse_and_map_pci.
> > > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads
> > > >> PCI_INTERRUPT_PIN from configuration space and saves it in paramet=
er
> of struct of_phandle_args.
> > > >> This structure is passed to irq_create_of_mapping where it invokes
> irq_create_fwspec_mapping.
> > > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets
> > > >> hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned to
> hwirq (*hwirq =3D fwspec->param[0]).
> > > >> And then using this hwirq irq_create_mapping -> irq_domain_associa=
te
> were invoked and mapping is created for virtual irq with this hwirq.
> > > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x=
4
> and so hwirq starts from 0x1 to 0x4.
> > > >>
> > > >> So the values are more generic w.r.t to protocol, that's why hwirq=
 will
> range from 0x1 to 0x4.
> > > >> And then if you check pcie-altera.c they are doing this adding one=
 in their
> handler and while creating legacy domain.
> > > >
> > > > Is this resolved yet?  Marc, are you happy, or should we iterate
> > > > on this again?
> > >
> > > Ah, sorry to have dropped the ball on this patch.
> >
> > No problem, I wasn't making forward progress anyway.
> >
> > > I guess that given that the infrastructure imposes the hwirq range
> > > on the host drivers, Bharat's approach is the only way (and a number
> > > of other host drivers are already slightly broken). I'll try and
> > > have a look at solving this at the generic level. In the meantime:
> > >
> > > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> >
> > After looking at this myself, I'm not happy with this either.  It
> > feels like there are bugs lurking here and we're just hiding one of the=
m.
> >
> > Here are the callers of irq_domain_add_linear() for legacy INTx in
> > drivers/pci/host:
> >
> >   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
> >   dra7xx_pcie_init_irq_domain  4
> >   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
> >   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
> >   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
> >   xilinx_pcie_init_irq_domain  4
>=20
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: F=
ix
> error when INTx is 4").  I should have noticed this inconsistency back th=
en.
>=20
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they =
only
> request 4 IRQs and only INTA, INTB, and INTC work?
>=20
Hi Bjorn,

xilinx (non-NWL) will also work with all 4 INTA, INTB, INTc and INTD, but I=
 haven't sent patches
for this yet, because by that time marc has already raised issue regarding =
the fix w.r.t this. So
once a proper fix was agreed upon for Xilinx-nwl, I will send fix for this =
also.

Thanks & Regards,
Bharat

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-14  5:34                         ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2016-09-14  5:34 UTC (permalink / raw)
  To: linux-arm-kernel

> [+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]
> 
> On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
> > On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > > >>>>>>> Hi Bharat,
> > > >>>>>>>> @@ -561,7 +561,7 @@ static int
> > > >>>>>>>> nwl_pcie_init_irq_domain(struct nwl_pcie
> > > >>>>>>> *pcie)
> > > >>>>>>>>     }
> > > >>>>>>>>
> > > >>>>>>>>     pcie->legacy_irq_domain =
> irq_domain_add_linear(legacy_intc_node,
> > > >>>>>>>> -                                                   INTX_NUM,
> > > >>>>>>>> +
> > > >>>>>>>> + INTX_NUM + 1,
> > > >>>>>>>>                                                     &legacy_domain_ops,
> > > >>>>>>>>                                                     pcie);
> > > >>>>>>>
> > > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM
> > > >>>>>>> irqs, so the domain allocation should reflect this. On the
> > > >>>>>>> other hand, the way the driver currently deals with mappings
> > > >>>>>>> is quite broken (consistently adding 1 to
> > > >>>>> the HW interrupt).
> > > >>>>>>>
> > > >>>>>> Hi Marc,
> > > >>>>>>
> > > >>>>>> Without above change I get following crash in kernel while booting.
> > > >>>>>>
> > > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > > >>>>>>
> > > >>>>>> [    2.441694] ------------[ cut here ]------------
> > > >>>>>>
> > > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > > >>>>>>
> > > >>>>>> [    2.441702] Modules linked in:
> > > >>>>>>
> > > >>>>>> [    2.441706]
> > > >>>>>>
> > > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > > >>>>>>
> > > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > > >>>>>>
> > > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > > >>>>> ffffffc071888000
> > > >>>>>>
> > > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > > >>>>>>
> > > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > > >>>>>>
> > > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > > >>>>>>
> > > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > > >>>>>>                  "error: hwirq 0x%x is too large for %s\n",
> > > >>>>>> (int)hwirq, domain-
> > > >>>> name))
> > > >>>>>>                 return -EINVAL;
> > > >>>>>>
> > > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > > >>>>>> condition
> > > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > > >>>>>
> > > >>>>> I understood that. I'm still persisting in saying that you have the wrong
> fix.
> > > >>>>>
> > > >>>>> Your domain should always allocate many interrupts as you have
> > > >>>>> interrupt sources. These interrupts (hwirq) should be numbered
> > > >>>>> from 0 to (n-
> > > >>> 1).
> > > >>>>
> > > >>>> Agreed, but here comes the problem the hwirq for legacy
> > > >>>> interrupts will start at 0x1 to 0x4 (INTA to INTD) and these
> > > >>>> values are as per PCIe specification for legacy interrupts. So
> > > >>>> these cannot be numbered from 0. So when 0x4 (INTD) for a
> > > >>>> multi-function device comes the crash occurs.
> > > >>>
> > > >>> So who provides this hwirq? Who calls irq_domain_associate()
> > > >>> with hwirq set to 4?
> > > >>>
> > > >> PCIe subsystem invokes pcibios_add_device function in
> arch/arm64/kernel/pci.c for every pci device.
> > > >> The purpose of this function is to assign dev->irq using
> of_irq_parse_and_map_pci.
> > > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads
> > > >> PCI_INTERRUPT_PIN from configuration space and saves it in parameter
> of struct of_phandle_args.
> > > >> This structure is passed to irq_create_of_mapping where it invokes
> irq_create_fwspec_mapping.
> > > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets
> > > >> hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned to
> hwirq (*hwirq = fwspec->param[0]).
> > > >> And then using this hwirq irq_create_mapping -> irq_domain_associate
> were invoked and mapping is created for virtual irq with this hwirq.
> > > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4
> and so hwirq starts from 0x1 to 0x4.
> > > >>
> > > >> So the values are more generic w.r.t to protocol, that's why hwirq will
> range from 0x1 to 0x4.
> > > >> And then if you check pcie-altera.c they are doing this adding one in their
> handler and while creating legacy domain.
> > > >
> > > > Is this resolved yet?  Marc, are you happy, or should we iterate
> > > > on this again?
> > >
> > > Ah, sorry to have dropped the ball on this patch.
> >
> > No problem, I wasn't making forward progress anyway.
> >
> > > I guess that given that the infrastructure imposes the hwirq range
> > > on the host drivers, Bharat's approach is the only way (and a number
> > > of other host drivers are already slightly broken). I'll try and
> > > have a look at solving this at the generic level. In the meantime:
> > >
> > > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> >
> > After looking at this myself, I'm not happy with this either.  It
> > feels like there are bugs lurking here and we're just hiding one of them.
> >
> > Here are the callers of irq_domain_add_linear() for legacy INTx in
> > drivers/pci/host:
> >
> >   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
> >   dra7xx_pcie_init_irq_domain  4
> >   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
> >   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
> >   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
> >   xilinx_pcie_init_irq_domain  4
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they only
> request 4 IRQs and only INTA, INTB, and INTC work?
> 
Hi Bjorn,

xilinx (non-NWL) will also work with all 4 INTA, INTB, INTc and INTD, but I haven't sent patches
for this yet, because by that time marc has already raised issue regarding the fix w.r.t this. So
once a proper fix was agreed upon for Xilinx-nwl, I will send fix for this also.

Thanks & Regards,
Bharat

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-13 15:34                       ` Bjorn Helgaas
  (?)
@ 2016-09-14  9:55                         ` Kishon Vijay Abraham I
  -1 siblings, 0 replies; 58+ messages in thread
From: Kishon Vijay Abraham I @ 2016-09-14  9:55 UTC (permalink / raw)
  To: Bjorn Helgaas, Marc Zyngier
  Cc: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri, Ley Foon Tan, Thomas Petazzoni,
	Murali Karicheri

Hi,

On Tuesday 13 September 2016 09:04 PM, Bjorn Helgaas wrote:
> [+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]
> 
> On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
>> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
>>> On 12/09/16 23:02, Bjorn Helgaas wrote:
>>>> On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
>>>>>>>>>> Hi Bharat,
>>>>>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>>>>>>>> nwl_pcie
>>>>>>>>>> *pcie)
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>>>>>>>> -                                                   INTX_NUM,
>>>>>>>>>>> +                                                   INTX_NUM + 1,
>>>>>>>>>>>                                                     &legacy_domain_ops,
>>>>>>>>>>>                                                     pcie);
>>>>>>>>>>
>>>>>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
>>>>>>>>>> the domain allocation should reflect this. On the other hand, the
>>>>>>>>>> way the driver currently deals with mappings is quite broken
>>>>>>>>>> (consistently adding 1 to
>>>>>>>> the HW interrupt).
>>>>>>>>>>
>>>>>>>>> Hi Marc,
>>>>>>>>>
>>>>>>>>> Without above change I get following crash in kernel while booting.
>>>>>>>>>
>>>>>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>>>>>>>
>>>>>>>>> [    2.441694] ------------[ cut here ]------------
>>>>>>>>>
>>>>>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>>>>>>>
>>>>>>>>> [    2.441702] Modules linked in:
>>>>>>>>>
>>>>>>>>> [    2.441706]
>>>>>>>>>
>>>>>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>>>>>>>
>>>>>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>>>>>>>
>>>>>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>>>>>>>> ffffffc071888000
>>>>>>>>>
>>>>>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>>>>>>>
>>>>>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>>>>>>>
>>>>>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>>>>>>>
>>>>>>>>> if (WARN(hwirq >= domain->hwirq_max,
>>>>>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
>>>>>>> name))
>>>>>>>>>                 return -EINVAL;
>>>>>>>>>
>>>>>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
>>>>>>>>> condition
>>>>>>>> (INTX_NUM + 1) due to which crash is coming.
>>>>>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>>>>>>>
>>>>>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>>>>>>>
>>>>>>>> Your domain should always allocate many interrupts as you have
>>>>>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
>>>>>> 1).
>>>>>>>
>>>>>>> Agreed, but here comes the problem the hwirq for legacy interrupts
>>>>>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
>>>>>>> PCIe specification for legacy interrupts. So these cannot be numbered
>>>>>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
>>>>>>> occurs.
>>>>>>
>>>>>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
>>>>>> 4?
>>>>>>
>>>>> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
>>>>> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
>>>>> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
>>>>> in parameter of struct of_phandle_args.
>>>>> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
>>>>> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
>>>>> to hwirq (*hwirq = fwspec->param[0]).
>>>>> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
>>>>> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
>>>>>
>>>>> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
>>>>> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
>>>>
>>>> Is this resolved yet?  Marc, are you happy, or should we iterate on this
>>>> again?
>>>
>>> Ah, sorry to have dropped the ball on this patch.
>>
>> No problem, I wasn't making forward progress anyway.
>>
>>> I guess that given that the infrastructure imposes the hwirq range on
>>> the host drivers, Bharat's approach is the only way (and a number of
>>> other host drivers are already slightly broken). I'll try and have a
>>> look at solving this at the generic level. In the meantime:
>>>
>>> Acked-by: Marc Zyngier <marc.zyngier@arm.com>
>>
>> After looking at this myself, I'm not happy with this either.  It feels
>> like there are bugs lurking here and we're just hiding one of them.
>>
>> Here are the callers of irq_domain_add_linear() for legacy INTx in
>> drivers/pci/host:
>>
>>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>>   dra7xx_pcie_init_irq_domain  4
>>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>>   xilinx_pcie_init_irq_domain  4
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back
> then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
> only request 4 IRQs and only INTA, INTB, and INTC work?

yeah.. it's broken in dra7xx. I get [1] when I configure the pci endpoint to
use INTD.

Thanks
Kishon

[1] -> http://pastebin.ubuntu.com/23177268/
> 
>> I think all of these use the of_irq_parse_and_map_pci() path you
>> mentioned, so if the problem is in the way that path works, I would
>> think these should *all* be requesting the same number of interrupts
>> in the domain.
>>
>> I agree with Marc that we should request 4 IRQs, because that's what
>> we need.  If we can't do that for some reason, we ought to at least
>> make all these callers the same.
>>
>> Bjorn
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-14  9:55                         ` Kishon Vijay Abraham I
  0 siblings, 0 replies; 58+ messages in thread
From: Kishon Vijay Abraham I @ 2016-09-14  9:55 UTC (permalink / raw)
  To: Bjorn Helgaas, Marc Zyngier
  Cc: Bharat Kumar Gogada, robh, bhelgaas, colin.king, Soren Brinkmann,
	Michal Simek, arnd, linux-arm-kernel, linux-pci, linux-kernel,
	Ravikiran Gummaluri, Ley Foon Tan, Thomas Petazzoni,
	Murali Karicheri

Hi,

On Tuesday 13 September 2016 09:04 PM, Bjorn Helgaas wrote:
> [+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]
> 
> On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
>> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
>>> On 12/09/16 23:02, Bjorn Helgaas wrote:
>>>> On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
>>>>>>>>>> Hi Bharat,
>>>>>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>>>>>>>> nwl_pcie
>>>>>>>>>> *pcie)
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>>>>>>>> -                                                   INTX_NUM,
>>>>>>>>>>> +                                                   INTX_NUM + 1,
>>>>>>>>>>>                                                     &legacy_domain_ops,
>>>>>>>>>>>                                                     pcie);
>>>>>>>>>>
>>>>>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
>>>>>>>>>> the domain allocation should reflect this. On the other hand, the
>>>>>>>>>> way the driver currently deals with mappings is quite broken
>>>>>>>>>> (consistently adding 1 to
>>>>>>>> the HW interrupt).
>>>>>>>>>>
>>>>>>>>> Hi Marc,
>>>>>>>>>
>>>>>>>>> Without above change I get following crash in kernel while booting.
>>>>>>>>>
>>>>>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>>>>>>>
>>>>>>>>> [    2.441694] ------------[ cut here ]------------
>>>>>>>>>
>>>>>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>>>>>>>
>>>>>>>>> [    2.441702] Modules linked in:
>>>>>>>>>
>>>>>>>>> [    2.441706]
>>>>>>>>>
>>>>>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>>>>>>>
>>>>>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>>>>>>>
>>>>>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>>>>>>>> ffffffc071888000
>>>>>>>>>
>>>>>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>>>>>>>
>>>>>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>>>>>>>
>>>>>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>>>>>>>
>>>>>>>>> if (WARN(hwirq >= domain->hwirq_max,
>>>>>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
>>>>>>> name))
>>>>>>>>>                 return -EINVAL;
>>>>>>>>>
>>>>>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
>>>>>>>>> condition
>>>>>>>> (INTX_NUM + 1) due to which crash is coming.
>>>>>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>>>>>>>
>>>>>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>>>>>>>
>>>>>>>> Your domain should always allocate many interrupts as you have
>>>>>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
>>>>>> 1).
>>>>>>>
>>>>>>> Agreed, but here comes the problem the hwirq for legacy interrupts
>>>>>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
>>>>>>> PCIe specification for legacy interrupts. So these cannot be numbered
>>>>>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
>>>>>>> occurs.
>>>>>>
>>>>>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
>>>>>> 4?
>>>>>>
>>>>> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
>>>>> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
>>>>> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
>>>>> in parameter of struct of_phandle_args.
>>>>> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
>>>>> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
>>>>> to hwirq (*hwirq = fwspec->param[0]).
>>>>> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
>>>>> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
>>>>>
>>>>> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
>>>>> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
>>>>
>>>> Is this resolved yet?  Marc, are you happy, or should we iterate on this
>>>> again?
>>>
>>> Ah, sorry to have dropped the ball on this patch.
>>
>> No problem, I wasn't making forward progress anyway.
>>
>>> I guess that given that the infrastructure imposes the hwirq range on
>>> the host drivers, Bharat's approach is the only way (and a number of
>>> other host drivers are already slightly broken). I'll try and have a
>>> look at solving this at the generic level. In the meantime:
>>>
>>> Acked-by: Marc Zyngier <marc.zyngier@arm.com>
>>
>> After looking at this myself, I'm not happy with this either.  It feels
>> like there are bugs lurking here and we're just hiding one of them.
>>
>> Here are the callers of irq_domain_add_linear() for legacy INTx in
>> drivers/pci/host:
>>
>>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>>   dra7xx_pcie_init_irq_domain  4
>>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>>   xilinx_pcie_init_irq_domain  4
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back
> then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
> only request 4 IRQs and only INTA, INTB, and INTC work?

yeah.. it's broken in dra7xx. I get [1] when I configure the pci endpoint to
use INTD.

Thanks
Kishon

[1] -> http://pastebin.ubuntu.com/23177268/
> 
>> I think all of these use the of_irq_parse_and_map_pci() path you
>> mentioned, so if the problem is in the way that path works, I would
>> think these should *all* be requesting the same number of interrupts
>> in the domain.
>>
>> I agree with Marc that we should request 4 IRQs, because that's what
>> we need.  If we can't do that for some reason, we ought to at least
>> make all these callers the same.
>>
>> Bjorn
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2016-09-14  9:55                         ` Kishon Vijay Abraham I
  0 siblings, 0 replies; 58+ messages in thread
From: Kishon Vijay Abraham I @ 2016-09-14  9:55 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Tuesday 13 September 2016 09:04 PM, Bjorn Helgaas wrote:
> [+cc Ley Foon (altera), Thomas (aardvark), Kishon (dra7xx), Murali (keystone)]
> 
> On Tue, Sep 13, 2016 at 10:05:11AM -0500, Bjorn Helgaas wrote:
>> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
>>> On 12/09/16 23:02, Bjorn Helgaas wrote:
>>>> On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
>>>>>>>>>> Hi Bharat,
>>>>>>>>>>> @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct
>>>>>>>>>>> nwl_pcie
>>>>>>>>>> *pcie)
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>>     pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
>>>>>>>>>>> -                                                   INTX_NUM,
>>>>>>>>>>> +                                                   INTX_NUM + 1,
>>>>>>>>>>>                                                     &legacy_domain_ops,
>>>>>>>>>>>                                                     pcie);
>>>>>>>>>>
>>>>>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs, so
>>>>>>>>>> the domain allocation should reflect this. On the other hand, the
>>>>>>>>>> way the driver currently deals with mappings is quite broken
>>>>>>>>>> (consistently adding 1 to
>>>>>>>> the HW interrupt).
>>>>>>>>>>
>>>>>>>>> Hi Marc,
>>>>>>>>>
>>>>>>>>> Without above change I get following crash in kernel while booting.
>>>>>>>>>
>>>>>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
>>>>>>>>>
>>>>>>>>> [    2.441694] ------------[ cut here ]------------
>>>>>>>>>
>>>>>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
>>>>>>>>>
>>>>>>>>> [    2.441702] Modules linked in:
>>>>>>>>>
>>>>>>>>> [    2.441706]
>>>>>>>>>
>>>>>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
>>>>>>>>>
>>>>>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
>>>>>>>>>
>>>>>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
>>>>>>>> ffffffc071888000
>>>>>>>>>
>>>>>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
>>>>>>>>>
>>>>>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
>>>>>>>>>
>>>>>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
>>>>>>>>>
>>>>>>>>> if (WARN(hwirq >= domain->hwirq_max,
>>>>>>>>>                  "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain-
>>>>>>> name))
>>>>>>>>>                 return -EINVAL;
>>>>>>>>>
>>>>>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
>>>>>>>>> condition
>>>>>>>> (INTX_NUM + 1) due to which crash is coming.
>>>>>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
>>>>>>>>
>>>>>>>> I understood that. I'm still persisting in saying that you have the wrong fix.
>>>>>>>>
>>>>>>>> Your domain should always allocate many interrupts as you have
>>>>>>>> interrupt sources. These interrupts (hwirq) should be numbered from 0 to (n-
>>>>>> 1).
>>>>>>>
>>>>>>> Agreed, but here comes the problem the hwirq for legacy interrupts
>>>>>>> will start at 0x1 to 0x4 (INTA to INTD) and these values are as per
>>>>>>> PCIe specification for legacy interrupts. So these cannot be numbered
>>>>>>> from 0. So when 0x4 (INTD) for a multi-function device comes the crash
>>>>>>> occurs.
>>>>>>
>>>>>> So who provides this hwirq? Who calls irq_domain_associate() with hwirq set to
>>>>>> 4?
>>>>>>
>>>>> PCIe subsystem invokes pcibios_add_device function in arch/arm64/kernel/pci.c for every pci device.
>>>>> The purpose of this function is to assign dev->irq using of_irq_parse_and_map_pci.
>>>>> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads PCI_INTERRUPT_PIN from configuration space and saves it
>>>>> in parameter of struct of_phandle_args.
>>>>> This structure is passed to irq_create_of_mapping where it invokes irq_create_fwspec_mapping.
>>>>> irq_create_fwspec_mapping invokes irq_domain_translate and gets hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned 
>>>>> to hwirq (*hwirq = fwspec->param[0]).
>>>>> And then using this hwirq irq_create_mapping -> irq_domain_associate were invoked and mapping is created for virtual irq with this hwirq.
>>>>> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and so hwirq starts from 0x1 to 0x4.
>>>>>
>>>>> So the values are more generic w.r.t to protocol, that's why hwirq will range from 0x1 to 0x4. 
>>>>> And then if you check pcie-altera.c they are doing this adding one in their handler and while creating legacy domain.
>>>>
>>>> Is this resolved yet?  Marc, are you happy, or should we iterate on this
>>>> again?
>>>
>>> Ah, sorry to have dropped the ball on this patch.
>>
>> No problem, I wasn't making forward progress anyway.
>>
>>> I guess that given that the infrastructure imposes the hwirq range on
>>> the host drivers, Bharat's approach is the only way (and a number of
>>> other host drivers are already slightly broken). I'll try and have a
>>> look at solving this at the generic level. In the meantime:
>>>
>>> Acked-by: Marc Zyngier <marc.zyngier@arm.com>
>>
>> After looking at this myself, I'm not happy with this either.  It feels
>> like there are bugs lurking here and we're just hiding one of them.
>>
>> Here are the callers of irq_domain_add_linear() for legacy INTx in
>> drivers/pci/host:
>>
>>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>>   dra7xx_pcie_init_irq_domain  4
>>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>>   xilinx_pcie_init_irq_domain  4
> 
> The altera change corresponding to this was 99496bd2971f ("PCI: altera: Fix
> error when INTx is 4").  I should have noticed this inconsistency back
> then.
> 
> Are aardvark, dra7xx, keystone, and xilinx (non-NWL) broken because they
> only request 4 IRQs and only INTA, INTB, and INTC work?

yeah.. it's broken in dra7xx. I get [1] when I configure the pci endpoint to
use INTD.

Thanks
Kishon

[1] -> http://pastebin.ubuntu.com/23177268/
> 
>> I think all of these use the of_irq_parse_and_map_pci() path you
>> mentioned, so if the problem is in the way that path works, I would
>> think these should *all* be requesting the same number of interrupts
>> in the domain.
>>
>> I agree with Marc that we should request 4 IRQs, because that's what
>> we need.  If we can't do that for some reason, we ought to at least
>> make all these callers the same.
>>
>> Bjorn
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo at vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
  2016-09-13 15:05                     ` Bjorn Helgaas
  (?)
@ 2017-03-02  8:46                       ` Bharat Kumar Gogada
  -1 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2017-03-02  8:46 UTC (permalink / raw)
  To: Bjorn Helgaas, Marc Zyngier
  Cc: robh, bhelgaas, colin.king, arnd, linux-arm-kernel, linux-pci,
	linux-kernel

Hi,

Any one is working on fix for this issue ? 

Regards,
Bharat

> -----Original Message-----
> From: Bjorn Helgaas [mailto:helgaas@kernel.org]
> Sent: Tuesday, September 13, 2016 8:35 PM
> To: Marc Zyngier <marc.zyngier@arm.com>
> Cc: Bharat Kumar Gogada <bharatku@xilinx.com>; robh@kernel.org;
> bhelgaas@google.com; colin.king@canonical.com; Soren Brinkmann
> <sorenb@xilinx.com>; Michal Simek <michals@xilinx.com>; arnd@arndb.de;
> linux-arm-kernel@lists.infradead.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Ravikiran Gummaluri <rgummal@xilinx.com>
> Subject: Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device
> for legacy interrupts.
> 
> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>>>>> Hi Bharat,
> > >>>>>>>> @@ -561,7 +561,7 @@ static int
> > >>>>>>>> nwl_pcie_init_irq_domain(struct nwl_pcie
> > >>>>>>> *pcie)
> > >>>>>>>>     }
> > >>>>>>>>
> > >>>>>>>>     pcie->legacy_irq_domain =
> irq_domain_add_linear(legacy_intc_node,
> > >>>>>>>> -                                                   INTX_NUM,
> > >>>>>>>> +                                                   INTX_NUM
> > >>>>>>>> + + 1,
> > >>>>>>>>                                                     &legacy_domain_ops,
> > >>>>>>>>                                                     pcie);
> > >>>>>>>
> > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs,
> > >>>>>>> so the domain allocation should reflect this. On the other
> > >>>>>>> hand, the way the driver currently deals with mappings is
> > >>>>>>> quite broken (consistently adding 1 to
> > >>>>> the HW interrupt).
> > >>>>>>>
> > >>>>>> Hi Marc,
> > >>>>>>
> > >>>>>> Without above change I get following crash in kernel while booting.
> > >>>>>>
> > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>>>>
> > >>>>>> [    2.441694] ------------[ cut here ]------------
> > >>>>>>
> > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>>>>
> > >>>>>> [    2.441702] Modules linked in:
> > >>>>>>
> > >>>>>> [    2.441706]
> > >>>>>>
> > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>>>>
> > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>>>>
> > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >>>>> ffffffc071888000
> > >>>>>>
> > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>>>>
> > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>>>>                  "error: hwirq 0x%x is too large for %s\n",
> > >>>>>> (int)hwirq, domain-
> > >>>> name))
> > >>>>>>                 return -EINVAL;
> > >>>>>>
> > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>>>>> condition
> > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>>>>
> > >>>>> I understood that. I'm still persisting in saying that you have the wrong
> fix.
> > >>>>>
> > >>>>> Your domain should always allocate many interrupts as you have
> > >>>>> interrupt sources. These interrupts (hwirq) should be numbered
> > >>>>> from 0 to (n-
> > >>> 1).
> > >>>>
> > >>>> Agreed, but here comes the problem the hwirq for legacy
> > >>>> interrupts will start at 0x1 to 0x4 (INTA to INTD) and these
> > >>>> values are as per PCIe specification for legacy interrupts. So
> > >>>> these cannot be numbered from 0. So when 0x4 (INTD) for a
> > >>>> multi-function device comes the crash occurs.
> > >>>
> > >>> So who provides this hwirq? Who calls irq_domain_associate() with
> > >>> hwirq set to 4?
> > >>>
> > >> PCIe subsystem invokes pcibios_add_device function in
> arch/arm64/kernel/pci.c for every pci device.
> > >> The purpose of this function is to assign dev->irq using
> of_irq_parse_and_map_pci.
> > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads
> > >> PCI_INTERRUPT_PIN from configuration space and saves it in parameter of
> struct of_phandle_args.
> > >> This structure is passed to irq_create_of_mapping where it invokes
> irq_create_fwspec_mapping.
> > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets
> > >> hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned to hwirq
> (*hwirq = fwspec->param[0]).
> > >> And then using this hwirq irq_create_mapping -> irq_domain_associate
> were invoked and mapping is created for virtual irq with this hwirq.
> > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and
> so hwirq starts from 0x1 to 0x4.
> > >>
> > >> So the values are more generic w.r.t to protocol, that's why hwirq will
> range from 0x1 to 0x4.
> > >> And then if you check pcie-altera.c they are doing this adding one in their
> handler and while creating legacy domain.
> > >
> > > Is this resolved yet?  Marc, are you happy, or should we iterate on
> > > this again?
> >
> > Ah, sorry to have dropped the ball on this patch.
> 
> No problem, I wasn't making forward progress anyway.
> 
> > I guess that given that the infrastructure imposes the hwirq range on
> > the host drivers, Bharat's approach is the only way (and a number of
> > other host drivers are already slightly broken). I'll try and have a
> > look at solving this at the generic level. In the meantime:
> >
> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> 
> After looking at this myself, I'm not happy with this either.  It feels like there are
> bugs lurking here and we're just hiding one of them.
> 
> Here are the callers of irq_domain_add_linear() for legacy INTx in
> drivers/pci/host:
> 
>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>   dra7xx_pcie_init_irq_domain  4
>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>   xilinx_pcie_init_irq_domain  4
> 
> I think all of these use the of_irq_parse_and_map_pci() path you mentioned, so
> if the problem is in the way that path works, I would think these should *all* be
> requesting the same number of interrupts in the domain.
> 
> I agree with Marc that we should request 4 IRQs, because that's what we need.
> If we can't do that for some reason, we ought to at least make all these callers
> the same.
> 
> Bjorn

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2017-03-02  8:46                       ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2017-03-02  8:46 UTC (permalink / raw)
  To: Bjorn Helgaas, Marc Zyngier
  Cc: robh, bhelgaas, colin.king, arnd, linux-arm-kernel, linux-pci,
	linux-kernel

Hi,

Any one is working on fix for this issue ?=20

Regards,
Bharat

> -----Original Message-----
> From: Bjorn Helgaas [mailto:helgaas@kernel.org]
> Sent: Tuesday, September 13, 2016 8:35 PM
> To: Marc Zyngier <marc.zyngier@arm.com>
> Cc: Bharat Kumar Gogada <bharatku@xilinx.com>; robh@kernel.org;
> bhelgaas@google.com; colin.king@canonical.com; Soren Brinkmann
> <sorenb@xilinx.com>; Michal Simek <michals@xilinx.com>; arnd@arndb.de;
> linux-arm-kernel@lists.infradead.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Ravikiran Gummaluri <rgummal@xilinx.com>
> Subject: Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi functi=
on device
> for legacy interrupts.
>=20
> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>>>>> Hi Bharat,
> > >>>>>>>> @@ -561,7 +561,7 @@ static int
> > >>>>>>>> nwl_pcie_init_irq_domain(struct nwl_pcie
> > >>>>>>> *pcie)
> > >>>>>>>>     }
> > >>>>>>>>
> > >>>>>>>>     pcie->legacy_irq_domain =3D
> irq_domain_add_linear(legacy_intc_node,
> > >>>>>>>> -                                                   INTX_NUM,
> > >>>>>>>> +                                                   INTX_NUM
> > >>>>>>>> + + 1,
> > >>>>>>>>                                                     &legacy_do=
main_ops,
> > >>>>>>>>                                                     pcie);
> > >>>>>>>
> > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs,
> > >>>>>>> so the domain allocation should reflect this. On the other
> > >>>>>>> hand, the way the driver currently deals with mappings is
> > >>>>>>> quite broken (consistently adding 1 to
> > >>>>> the HW interrupt).
> > >>>>>>>
> > >>>>>> Hi Marc,
> > >>>>>>
> > >>>>>> Without above change I get following crash in kernel while booti=
ng.
> > >>>>>>
> > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>>>>
> > >>>>>> [    2.441694] ------------[ cut here ]------------
> > >>>>>>
> > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>>>>
> > >>>>>> [    2.441702] Modules linked in:
> > >>>>>>
> > >>>>>> [    2.441706]
> > >>>>>>
> > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #=
8
> > >>>>>>
> > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>>>>
> > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.=
ti:
> > >>>>> ffffffc071888000
> > >>>>>>
> > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>>>>
> > >>>>>> if (WARN(hwirq >=3D domain->hwirq_max,
> > >>>>>>                  "error: hwirq 0x%x is too large for %s\n",
> > >>>>>> (int)hwirq, domain-
> > >>>> name))
> > >>>>>>                 return -EINVAL;
> > >>>>>>
> > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>>>>> condition
> > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > >>>>>> This is happening as the legacy interrupts are starting from 1 (=
INTA).
> > >>>>>
> > >>>>> I understood that. I'm still persisting in saying that you have t=
he wrong
> fix.
> > >>>>>
> > >>>>> Your domain should always allocate many interrupts as you have
> > >>>>> interrupt sources. These interrupts (hwirq) should be numbered
> > >>>>> from 0 to (n-
> > >>> 1).
> > >>>>
> > >>>> Agreed, but here comes the problem the hwirq for legacy
> > >>>> interrupts will start at 0x1 to 0x4 (INTA to INTD) and these
> > >>>> values are as per PCIe specification for legacy interrupts. So
> > >>>> these cannot be numbered from 0. So when 0x4 (INTD) for a
> > >>>> multi-function device comes the crash occurs.
> > >>>
> > >>> So who provides this hwirq? Who calls irq_domain_associate() with
> > >>> hwirq set to 4?
> > >>>
> > >> PCIe subsystem invokes pcibios_add_device function in
> arch/arm64/kernel/pci.c for every pci device.
> > >> The purpose of this function is to assign dev->irq using
> of_irq_parse_and_map_pci.
> > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads
> > >> PCI_INTERRUPT_PIN from configuration space and saves it in parameter=
 of
> struct of_phandle_args.
> > >> This structure is passed to irq_create_of_mapping where it invokes
> irq_create_fwspec_mapping.
> > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets
> > >> hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned to h=
wirq
> (*hwirq =3D fwspec->param[0]).
> > >> And then using this hwirq irq_create_mapping -> irq_domain_associate
> were invoked and mapping is created for virtual irq with this hwirq.
> > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 =
and
> so hwirq starts from 0x1 to 0x4.
> > >>
> > >> So the values are more generic w.r.t to protocol, that's why hwirq w=
ill
> range from 0x1 to 0x4.
> > >> And then if you check pcie-altera.c they are doing this adding one i=
n their
> handler and while creating legacy domain.
> > >
> > > Is this resolved yet?  Marc, are you happy, or should we iterate on
> > > this again?
> >
> > Ah, sorry to have dropped the ball on this patch.
>=20
> No problem, I wasn't making forward progress anyway.
>=20
> > I guess that given that the infrastructure imposes the hwirq range on
> > the host drivers, Bharat's approach is the only way (and a number of
> > other host drivers are already slightly broken). I'll try and have a
> > look at solving this at the generic level. In the meantime:
> >
> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
>=20
> After looking at this myself, I'm not happy with this either.  It feels l=
ike there are
> bugs lurking here and we're just hiding one of them.
>=20
> Here are the callers of irq_domain_add_linear() for legacy INTx in
> drivers/pci/host:
>=20
>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>   dra7xx_pcie_init_irq_domain  4
>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>   xilinx_pcie_init_irq_domain  4
>=20
> I think all of these use the of_irq_parse_and_map_pci() path you mentione=
d, so
> if the problem is in the way that path works, I would think these should =
*all* be
> requesting the same number of interrupts in the domain.
>=20
> I agree with Marc that we should request 4 IRQs, because that's what we n=
eed.
> If we can't do that for some reason, we ought to at least make all these =
callers
> the same.
>=20
> Bjorn

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts.
@ 2017-03-02  8:46                       ` Bharat Kumar Gogada
  0 siblings, 0 replies; 58+ messages in thread
From: Bharat Kumar Gogada @ 2017-03-02  8:46 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

Any one is working on fix for this issue ? 

Regards,
Bharat

> -----Original Message-----
> From: Bjorn Helgaas [mailto:helgaas at kernel.org]
> Sent: Tuesday, September 13, 2016 8:35 PM
> To: Marc Zyngier <marc.zyngier@arm.com>
> Cc: Bharat Kumar Gogada <bharatku@xilinx.com>; robh at kernel.org;
> bhelgaas at google.com; colin.king at canonical.com; Soren Brinkmann
> <sorenb@xilinx.com>; Michal Simek <michals@xilinx.com>; arnd at arndb.de;
> linux-arm-kernel at lists.infradead.org; linux-pci at vger.kernel.org; linux-
> kernel at vger.kernel.org; Ravikiran Gummaluri <rgummal@xilinx.com>
> Subject: Re: [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device
> for legacy interrupts.
> 
> On Tue, Sep 13, 2016 at 08:41:28AM +0100, Marc Zyngier wrote:
> > On 12/09/16 23:02, Bjorn Helgaas wrote:
> > > On Thu, Sep 01, 2016 at 05:19:55AM +0000, Bharat Kumar Gogada wrote:
> > >>>>>>> Hi Bharat,
> > >>>>>>>> @@ -561,7 +561,7 @@ static int
> > >>>>>>>> nwl_pcie_init_irq_domain(struct nwl_pcie
> > >>>>>>> *pcie)
> > >>>>>>>>     }
> > >>>>>>>>
> > >>>>>>>>     pcie->legacy_irq_domain =
> irq_domain_add_linear(legacy_intc_node,
> > >>>>>>>> -                                                   INTX_NUM,
> > >>>>>>>> +                                                   INTX_NUM
> > >>>>>>>> + + 1,
> > >>>>>>>>                                                     &legacy_domain_ops,
> > >>>>>>>>                                                     pcie);
> > >>>>>>>
> > >>>>>>> This feels like the wrong thing to do. You have INTX_NUM irqs,
> > >>>>>>> so the domain allocation should reflect this. On the other
> > >>>>>>> hand, the way the driver currently deals with mappings is
> > >>>>>>> quite broken (consistently adding 1 to
> > >>>>> the HW interrupt).
> > >>>>>>>
> > >>>>>> Hi Marc,
> > >>>>>>
> > >>>>>> Without above change I get following crash in kernel while booting.
> > >>>>>>
> > >>>>>> [    2.441684] error: hwirq 0x4 is too large for dummy
> > >>>>>>
> > >>>>>> [    2.441694] ------------[ cut here ]------------
> > >>>>>>
> > >>>>>> [    2.441698] WARNING: at kernel/irq/irqdomain.c:344
> > >>>>>>
> > >>>>>> [    2.441702] Modules linked in:
> > >>>>>>
> > >>>>>> [    2.441706]
> > >>>>>>
> > >>>>>> [    2.441714] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.4.0 #8
> > >>>>>>
> > >>>>>> [    2.441718] Hardware name: xlnx,zynqmp (DT)
> > >>>>>>
> > >>>>>> [    2.441723] task: ffffffc071886b80 ti: ffffffc071888000 task.ti:
> > >>>>> ffffffc071888000
> > >>>>>>
> > >>>>>> [    2.441732] PC is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> [    2.441738] LR is at irq_domain_associate+0x138/0x1c0
> > >>>>>>
> > >>>>>> In kernel/irq/irqdomain.c function irq_domain_associate
> > >>>>>>
> > >>>>>> if (WARN(hwirq >= domain->hwirq_max,
> > >>>>>>                  "error: hwirq 0x%x is too large for %s\n",
> > >>>>>> (int)hwirq, domain-
> > >>>> name))
> > >>>>>>                 return -EINVAL;
> > >>>>>>
> > >>>>>> Here the hwirq and hwirq_max are equal to 4 without the above
> > >>>>>> condition
> > >>>>> (INTX_NUM + 1) due to which crash is coming.
> > >>>>>> This is happening as the legacy interrupts are starting from 1 (INTA).
> > >>>>>
> > >>>>> I understood that. I'm still persisting in saying that you have the wrong
> fix.
> > >>>>>
> > >>>>> Your domain should always allocate many interrupts as you have
> > >>>>> interrupt sources. These interrupts (hwirq) should be numbered
> > >>>>> from 0 to (n-
> > >>> 1).
> > >>>>
> > >>>> Agreed, but here comes the problem the hwirq for legacy
> > >>>> interrupts will start at 0x1 to 0x4 (INTA to INTD) and these
> > >>>> values are as per PCIe specification for legacy interrupts. So
> > >>>> these cannot be numbered from 0. So when 0x4 (INTD) for a
> > >>>> multi-function device comes the crash occurs.
> > >>>
> > >>> So who provides this hwirq? Who calls irq_domain_associate() with
> > >>> hwirq set to 4?
> > >>>
> > >> PCIe subsystem invokes pcibios_add_device function in
> arch/arm64/kernel/pci.c for every pci device.
> > >> The purpose of this function is to assign dev->irq using
> of_irq_parse_and_map_pci.
> > >> of_irq_parse_and_map_pci invokes of_irq_parse_pci where it reads
> > >> PCI_INTERRUPT_PIN from configuration space and saves it in parameter of
> struct of_phandle_args.
> > >> This structure is passed to irq_create_of_mapping where it invokes
> irq_create_fwspec_mapping.
> > >> irq_create_fwspec_mapping invokes irq_domain_translate and gets
> > >> hwirq, here the above saved PCI_INTERRUPT_PIN value is assigned to hwirq
> (*hwirq = fwspec->param[0]).
> > >> And then using this hwirq irq_create_mapping -> irq_domain_associate
> were invoked and mapping is created for virtual irq with this hwirq.
> > >> So for any end point PCI_INTERRUPT_PIN value starts from 0x1 to 0x4 and
> so hwirq starts from 0x1 to 0x4.
> > >>
> > >> So the values are more generic w.r.t to protocol, that's why hwirq will
> range from 0x1 to 0x4.
> > >> And then if you check pcie-altera.c they are doing this adding one in their
> handler and while creating legacy domain.
> > >
> > > Is this resolved yet?  Marc, are you happy, or should we iterate on
> > > this again?
> >
> > Ah, sorry to have dropped the ball on this patch.
> 
> No problem, I wasn't making forward progress anyway.
> 
> > I guess that given that the infrastructure imposes the hwirq range on
> > the host drivers, Bharat's approach is the only way (and a number of
> > other host drivers are already slightly broken). I'll try and have a
> > look at solving this at the generic level. In the meantime:
> >
> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> 
> After looking at this myself, I'm not happy with this either.  It feels like there are
> bugs lurking here and we're just hiding one of them.
> 
> Here are the callers of irq_domain_add_linear() for legacy INTx in
> drivers/pci/host:
> 
>   advk_pcie_init_irq_domain    LEGACY_IRQ_NUM   (4)
>   dra7xx_pcie_init_irq_domain  4
>   ks_dw_pcie_host_init         MAX_LEGACY_IRQS  (4)
>   altera_pcie_init_irq_domain  INTX_NUM + 1     (5)
>   nwl_pcie_init_irq_domain     INTX_NUM + 1     (5)
>   xilinx_pcie_init_irq_domain  4
> 
> I think all of these use the of_irq_parse_and_map_pci() path you mentioned, so
> if the problem is in the way that path works, I would think these should *all* be
> requesting the same number of interrupts in the domain.
> 
> I agree with Marc that we should request 4 IRQs, because that's what we need.
> If we can't do that for some reason, we ought to at least make all these callers
> the same.
> 
> Bjorn

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2017-03-02  8:46 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-30 10:39 [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred Bharat Kumar Gogada
2016-08-30 10:39 ` Bharat Kumar Gogada
2016-08-30 10:39 ` [PATCH 2/3] PCI: Xilinx NWL PCIe: Enabling all MSI interrupts using MSI mask Bharat Kumar Gogada
2016-08-30 10:39   ` Bharat Kumar Gogada
2016-08-30 10:39   ` Bharat Kumar Gogada
2016-08-30 10:39 ` [PATCH 3/3] PCI: Xilinx NWL PCIe: Fix Error for multi function device for legacy interrupts Bharat Kumar Gogada
2016-08-30 10:39   ` Bharat Kumar Gogada
2016-08-30 12:17   ` Marc Zyngier
2016-08-30 12:17     ` Marc Zyngier
2016-08-30 14:13     ` Bharat Kumar Gogada
2016-08-30 14:13       ` Bharat Kumar Gogada
2016-08-30 14:13       ` Bharat Kumar Gogada
2016-08-30 15:02       ` Marc Zyngier
2016-08-30 15:02         ` Marc Zyngier
2016-08-30 15:02         ` Marc Zyngier
2016-08-31  9:56         ` Bharat Kumar Gogada
2016-08-31  9:56           ` Bharat Kumar Gogada
2016-08-31  9:56           ` Bharat Kumar Gogada
2016-08-31 10:56           ` Marc Zyngier
2016-08-31 10:56             ` Marc Zyngier
2016-08-31 10:56             ` Marc Zyngier
2016-09-01  5:19             ` Bharat Kumar Gogada
2016-09-01  5:19               ` Bharat Kumar Gogada
2016-09-01  5:19               ` Bharat Kumar Gogada
2016-09-12 22:02               ` Bjorn Helgaas
2016-09-12 22:02                 ` Bjorn Helgaas
2016-09-12 22:02                 ` Bjorn Helgaas
2016-09-13  7:41                 ` Marc Zyngier
2016-09-13  7:41                   ` Marc Zyngier
2016-09-13  7:41                   ` Marc Zyngier
2016-09-13 15:05                   ` Bjorn Helgaas
2016-09-13 15:05                     ` Bjorn Helgaas
2016-09-13 15:05                     ` Bjorn Helgaas
2016-09-13 15:34                     ` Bjorn Helgaas
2016-09-13 15:34                       ` Bjorn Helgaas
2016-09-13 15:34                       ` Bjorn Helgaas
2016-09-13 15:50                       ` Thomas Petazzoni
2016-09-13 15:50                         ` Thomas Petazzoni
2016-09-13 15:50                         ` Thomas Petazzoni
2016-09-14  5:34                       ` Bharat Kumar Gogada
2016-09-14  5:34                         ` Bharat Kumar Gogada
2016-09-14  5:34                         ` Bharat Kumar Gogada
2016-09-14  9:55                       ` Kishon Vijay Abraham I
2016-09-14  9:55                         ` Kishon Vijay Abraham I
2016-09-14  9:55                         ` Kishon Vijay Abraham I
2017-03-02  8:46                     ` Bharat Kumar Gogada
2017-03-02  8:46                       ` Bharat Kumar Gogada
2017-03-02  8:46                       ` Bharat Kumar Gogada
2016-09-13 14:32 ` [PATCH 1/3] PCI: Xilinx NWL PCIe: Expanding PCIe core errors and printing event occurred Bjorn Helgaas
2016-09-13 14:32   ` Bjorn Helgaas
2016-09-14  5:26   ` Bharat Kumar Gogada
2016-09-14  5:26     ` Bharat Kumar Gogada
2016-09-14  5:26     ` Bharat Kumar Gogada
2016-09-13 15:18 ` Bjorn Helgaas
2016-09-13 15:18   ` Bjorn Helgaas
2016-09-14  5:28   ` Bharat Kumar Gogada
2016-09-14  5:28     ` Bharat Kumar Gogada
2016-09-14  5:28     ` Bharat Kumar Gogada

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.