linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/2] PCI: iproc: SOC specific fixes
@ 2017-06-11  4:05 Oza Pawandeep
  2017-06-11  4:05 ` [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP Oza Pawandeep
  2017-06-11  4:05 ` [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC Oza Pawandeep
  0 siblings, 2 replies; 12+ messages in thread
From: Oza Pawandeep @ 2017-06-11  4:05 UTC (permalink / raw)
  To: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	bcm-kernel-feedback-list, Oza Pawandeep, Andy Gospodarek,
	linux-pci, linux-kernel, Oza Pawandeep

PCI: iproc: Retry request when CRS returned from EP

Above patch adds support for CRS in PCI RC driver,
otherwise if not handled at lower level, the user
space PMD (poll mode drivers) can timeout.

PCI: iproc: add device shutdown for PCI RC

This fixes the issue where certian PCI endpoints are
not getting detected on Stingray SOC after reboot.

Changes Since v3:
[re-send] 
Changes Since v2:
Fix compilation errors for pcie-iproc-platform.ko
which was caught by kbuild.

Oza Pawandeep (2):
  PCI: iproc: Retry request when CRS returned from EP
  PCI: iproc: add device shutdown for PCI RC

 drivers/pci/host/pcie-iproc-platform.c |   8 +++
 drivers/pci/host/pcie-iproc.c          | 126 ++++++++++++++++++++++++++++-----
 drivers/pci/host/pcie-iproc.h          |   1 +
 3 files changed, 116 insertions(+), 19 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP
  2017-06-11  4:05 [PATCH v3 0/2] PCI: iproc: SOC specific fixes Oza Pawandeep
@ 2017-06-11  4:05 ` Oza Pawandeep
  2017-06-12 23:30   ` Bjorn Helgaas
  2017-06-11  4:05 ` [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC Oza Pawandeep
  1 sibling, 1 reply; 12+ messages in thread
From: Oza Pawandeep @ 2017-06-11  4:05 UTC (permalink / raw)
  To: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	bcm-kernel-feedback-list, Oza Pawandeep, Andy Gospodarek,
	linux-pci, linux-kernel, Oza Pawandeep

For Configuration Requests only, following reset
it is possible for a device to terminate the request
but indicate that it is temporarily unable to process
the Request, but will be able to process the Request
in the future – in this case, the Configuration Request
Retry Status 10 (CRS) Completion Status is used

SPDK user space NVMe driver reinitializes NVMe which
causes reset, while doing this some configuration requests
get NAKed by Endpoint (NVMe).
Current iproc PCI driver is agnostic about it.
PAXB will forward the NAKed response in stipulated AXI code.

NVMe spec defines this timeout in 500 ms units, and this
only happens if controller has been in reset, or with new firmware,
or in abrupt shutdown case.
Meanwhile config access could result into retry.

This patch fixes the problem, and attempts to read again in case
of PAXB forwarding the NAK.

It implements iproc_pcie_config_read which gets called for Stingray.
Otherwise it falls back to PCI generic APIs.

Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>

diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
index 0f39bd2..05a3647 100644
--- a/drivers/pci/host/pcie-iproc.c
+++ b/drivers/pci/host/pcie-iproc.c
@@ -68,6 +68,9 @@
 #define APB_ERR_EN_SHIFT             0
 #define APB_ERR_EN                   BIT(APB_ERR_EN_SHIFT)
 
+#define CFG_RETRY_STATUS             0xffff0001
+#define CFG_RETRY_STATUS_TIMEOUT_US  500000 /* 500 milli-seconds. */
+
 /* derive the enum index of the outbound/inbound mapping registers */
 #define MAP_REG(base_reg, index)      ((base_reg) + (index) * 2)
 
@@ -448,6 +451,47 @@ static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus,
 	}
 }
 
+static int iproc_pcie_cfg_retry(void __iomem *cfg_data_p)
+{
+	int timeout = CFG_RETRY_STATUS_TIMEOUT_US;
+	unsigned int ret;
+
+	do {
+		ret = readl(cfg_data_p);
+		if (ret == CFG_RETRY_STATUS)
+			udelay(1);
+		else
+			return PCIBIOS_SUCCESSFUL;
+	} while (timeout--);
+
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie,
+					       unsigned int busno,
+					       unsigned int slot,
+					       unsigned int fn,
+					       int where)
+{
+	u16 offset;
+	u32 val;
+
+	/* EP device access */
+	val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
+		(slot << CFG_ADDR_DEV_NUM_SHIFT) |
+		(fn << CFG_ADDR_FUNC_NUM_SHIFT) |
+		(where & CFG_ADDR_REG_NUM_MASK) |
+		(1 & CFG_ADDR_CFG_TYPE_MASK);
+
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
+	offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
+
+	if (iproc_pcie_reg_is_invalid(offset))
+		return NULL;
+
+	return (pcie->base + offset);
+}
+
 /**
  * Note access to the configuration registers are protected at the higher layer
  * by 'pci_lock' in drivers/pci/access.c
@@ -499,13 +543,48 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
 		return (pcie->base + offset);
 }
 
+static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 *val)
+{
+	struct iproc_pcie *pcie = iproc_data(bus);
+	unsigned int slot = PCI_SLOT(devfn);
+	unsigned int fn = PCI_FUNC(devfn);
+	unsigned int busno = bus->number;
+	void __iomem *cfg_data_p;
+	int ret;
+
+	/* root complex access. */
+	if (busno == 0)
+		return pci_generic_config_read32(bus, devfn, where, size, val);
+
+	cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
+
+	if (!cfg_data_p)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = iproc_pcie_cfg_retry(cfg_data_p);
+	if (ret)
+		return ret;
+
+	*val = readl(cfg_data_p);
+
+	if (size <= 2)
+		*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
 static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 				    int where, int size, u32 *val)
 {
 	int ret;
+	struct iproc_pcie *pcie = iproc_data(bus);
 
 	iproc_pcie_apb_err_disable(bus, true);
-	ret = pci_generic_config_read32(bus, devfn, where, size, val);
+	if (pcie->type == IPROC_PCIE_PAXB_V2)
+		ret = iproc_pcie_config_read(bus, devfn, where, size, val);
+	else
+		ret = pci_generic_config_read32(bus, devfn, where, size, val);
 	iproc_pcie_apb_err_disable(bus, false);
 
 	return ret;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC
  2017-06-11  4:05 [PATCH v3 0/2] PCI: iproc: SOC specific fixes Oza Pawandeep
  2017-06-11  4:05 ` [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP Oza Pawandeep
@ 2017-06-11  4:05 ` Oza Pawandeep
  2017-06-12 23:43   ` Bjorn Helgaas
  1 sibling, 1 reply; 12+ messages in thread
From: Oza Pawandeep @ 2017-06-11  4:05 UTC (permalink / raw)
  To: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	bcm-kernel-feedback-list, Oza Pawandeep, Andy Gospodarek,
	linux-pci, linux-kernel, Oza Pawandeep

PERST# must be asserted around ~500ms before
the reboot is applied.

During soft reset (e.g., "reboot" from Linux) on some iProc based SoCs
LCPLL clock and PERST both goes off simultaneously.
This will cause certain Endpoints Intel NVMe not get detected, upon
next boot sequence.

This happens because; Endpoint is expecting the clock for some amount of
time after PERST is asserted, which is not happening in our case
(Compare to Intel X86 boards, will have clocks running).
this cause NVMe to behave in undefined way.

Essentially clock will remain alive for 500ms with PERST# = 0
before reboot.

This patch adds platform shutdown where it should be
called in device_shutdown while reboot command is issued.

So in sequence first Endpoint Shutdown (e.g. nvme_shutdown)
followed by RC shutdown is called, which issues safe PERST
assertion.

Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>

diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
index 90d2bdd..9512960 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -131,6 +131,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
 	return iproc_pcie_remove(pcie);
 }
 
+static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
+{
+	struct iproc_pcie *pcie = platform_get_drvdata(pdev);
+
+	iproc_pcie_shutdown(pcie);
+}
+
 static struct platform_driver iproc_pcie_pltfm_driver = {
 	.driver = {
 		.name = "iproc-pcie",
@@ -138,6 +145,7 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
 	},
 	.probe = iproc_pcie_pltfm_probe,
 	.remove = iproc_pcie_pltfm_remove,
+	.shutdown = iproc_pcie_pltfm_shutdown,
 };
 module_platform_driver(iproc_pcie_pltfm_driver);
 
diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
index 05a3647..bb61376 100644
--- a/drivers/pci/host/pcie-iproc.c
+++ b/drivers/pci/host/pcie-iproc.c
@@ -608,32 +608,40 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn,
 	.write = iproc_pcie_config_write32,
 };
 
-static void iproc_pcie_reset(struct iproc_pcie *pcie)
+static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
 {
 	u32 val;
 
 	/*
-	 * PAXC and the internal emulated endpoint device downstream should not
-	 * be reset.  If firmware has been loaded on the endpoint device at an
-	 * earlier boot stage, reset here causes issues.
+	 * The internal emulated endpoints (such as PAXC) device downstream
+	 * should not be reset. If firmware has been loaded on the endpoint
+	 * device at an earlier boot stage, reset here causes issues.
 	 */
 	if (pcie->ep_is_internal)
 		return;
 
-	/*
-	 * Select perst_b signal as reset source. Put the device into reset,
-	 * and then bring it out of reset
-	 */
-	val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
-	val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
-		~RC_PCIE_RST_OUTPUT;
-	iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
-	udelay(250);
-
-	val |= RC_PCIE_RST_OUTPUT;
-	iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
-	msleep(100);
+	if (assert) {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+		val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
+			~RC_PCIE_RST_OUTPUT;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+		udelay(250);
+	} else {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+		val |= RC_PCIE_RST_OUTPUT;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+		msleep(100);
+	}
+}
+
+int iproc_pcie_shutdown(struct iproc_pcie *pcie)
+{
+	iproc_pcie_perst_ctrl(pcie, true);
+	msleep(500);
+
+	return 0;
 }
+EXPORT_SYMBOL(iproc_pcie_shutdown);
 
 static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus)
 {
@@ -1310,7 +1318,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
 		goto err_exit_phy;
 	}
 
-	iproc_pcie_reset(pcie);
+	iproc_pcie_perst_ctrl(pcie, true);
+	iproc_pcie_perst_ctrl(pcie, false);
 
 	if (pcie->need_ob_cfg) {
 		ret = iproc_pcie_map_ranges(pcie, res);
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
index 0bbe2ea..a6b55ce 100644
--- a/drivers/pci/host/pcie-iproc.h
+++ b/drivers/pci/host/pcie-iproc.h
@@ -110,6 +110,7 @@ struct iproc_pcie {
 
 int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
 int iproc_pcie_remove(struct iproc_pcie *pcie);
+int iproc_pcie_shutdown(struct iproc_pcie *pcie);
 
 #ifdef CONFIG_PCIE_IPROC_MSI
 int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP
  2017-06-11  4:05 ` [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP Oza Pawandeep
@ 2017-06-12 23:30   ` Bjorn Helgaas
  2017-06-13  4:28     ` Oza Oza
  0 siblings, 1 reply; 12+ messages in thread
From: Bjorn Helgaas @ 2017-06-12 23:30 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	bcm-kernel-feedback-list, Andy Gospodarek, linux-pci,
	linux-kernel, Oza Pawandeep

Please wrap your changelogs to use 75 columns.  "git log" indents the
changelog by four spaces, so if your text is 75 wide, it will still
fit without wrapping.

On Sun, Jun 11, 2017 at 09:35:37AM +0530, Oza Pawandeep wrote:
> For Configuration Requests only, following reset
> it is possible for a device to terminate the request
> but indicate that it is temporarily unable to process
> the Request, but will be able to process the Request
> in the future – in this case, the Configuration Request
> Retry Status 10 (CRS) Completion Status is used

How does this relate to the CRS support we already have in the core,
e.g., pci_bus_read_dev_vendor_id()?  It looks like your root complex
already returns 0xffff0001 (CFG_RETRY_STATUS) in some cases.

Also, per spec (PCIe r3.1, sec 2.3.2), CRS Software Visibility only
affects config reads of the Vendor ID, but you call
iproc_pcie_cfg_retry() for all config offsets.

> SPDK user space NVMe driver reinitializes NVMe which
> causes reset, while doing this some configuration requests
> get NAKed by Endpoint (NVMe).

What's SPDK?  I don't know what "NAK" means in a PCIe context.  If you
can use the appropriate PCIe terminology, I think it will make more
sense to me.

> Current iproc PCI driver is agnostic about it.
> PAXB will forward the NAKed response in stipulated AXI code.

In general a native host bridge driver should not have to deal with
the CRS feature because it's supported in the PCI core.  So we need
some explanation about why iProc is special in this regard.

> NVMe spec defines this timeout in 500 ms units, and this
> only happens if controller has been in reset, or with new firmware,
> or in abrupt shutdown case.
> Meanwhile config access could result into retry.

I don't understand why NVMe is relevant here.  Is there something
special about NVMe and CRS?

> This patch fixes the problem, and attempts to read again in case
> of PAXB forwarding the NAK.
> 
> It implements iproc_pcie_config_read which gets called for Stingray.
> Otherwise it falls back to PCI generic APIs.
> 
> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
> 
> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
> index 0f39bd2..05a3647 100644
> --- a/drivers/pci/host/pcie-iproc.c
> +++ b/drivers/pci/host/pcie-iproc.c
> @@ -68,6 +68,9 @@
>  #define APB_ERR_EN_SHIFT             0
>  #define APB_ERR_EN                   BIT(APB_ERR_EN_SHIFT)
>  
> +#define CFG_RETRY_STATUS             0xffff0001
> +#define CFG_RETRY_STATUS_TIMEOUT_US  500000 /* 500 milli-seconds. */
> +
>  /* derive the enum index of the outbound/inbound mapping registers */
>  #define MAP_REG(base_reg, index)      ((base_reg) + (index) * 2)
>  
> @@ -448,6 +451,47 @@ static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus,
>  	}
>  }
>  
> +static int iproc_pcie_cfg_retry(void __iomem *cfg_data_p)
> +{
> +	int timeout = CFG_RETRY_STATUS_TIMEOUT_US;
> +	unsigned int ret;
> +
> +	do {
> +		ret = readl(cfg_data_p);
> +		if (ret == CFG_RETRY_STATUS)
> +			udelay(1);
> +		else
> +			return PCIBIOS_SUCCESSFUL;
> +	} while (timeout--);
> +
> +	return PCIBIOS_DEVICE_NOT_FOUND;
> +}
> +
> +static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie,
> +					       unsigned int busno,
> +					       unsigned int slot,
> +					       unsigned int fn,
> +					       int where)
> +{
> +	u16 offset;
> +	u32 val;
> +
> +	/* EP device access */
> +	val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
> +		(slot << CFG_ADDR_DEV_NUM_SHIFT) |
> +		(fn << CFG_ADDR_FUNC_NUM_SHIFT) |
> +		(where & CFG_ADDR_REG_NUM_MASK) |
> +		(1 & CFG_ADDR_CFG_TYPE_MASK);
> +
> +	iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
> +	offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
> +
> +	if (iproc_pcie_reg_is_invalid(offset))
> +		return NULL;
> +
> +	return (pcie->base + offset);
> +}
> +
>  /**
>   * Note access to the configuration registers are protected at the higher layer
>   * by 'pci_lock' in drivers/pci/access.c
> @@ -499,13 +543,48 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
>  		return (pcie->base + offset);
>  }
>  
> +static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
> +				    int where, int size, u32 *val)
> +{
> +	struct iproc_pcie *pcie = iproc_data(bus);
> +	unsigned int slot = PCI_SLOT(devfn);
> +	unsigned int fn = PCI_FUNC(devfn);
> +	unsigned int busno = bus->number;
> +	void __iomem *cfg_data_p;
> +	int ret;
> +
> +	/* root complex access. */
> +	if (busno == 0)
> +		return pci_generic_config_read32(bus, devfn, where, size, val);
> +
> +	cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
> +
> +	if (!cfg_data_p)
> +		return PCIBIOS_DEVICE_NOT_FOUND;
> +
> +	ret = iproc_pcie_cfg_retry(cfg_data_p);
> +	if (ret)
> +		return ret;
> +
> +	*val = readl(cfg_data_p);
> +
> +	if (size <= 2)
> +		*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
> +
> +	return PCIBIOS_SUCCESSFUL;
> +}
> +
>  static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>  				    int where, int size, u32 *val)
>  {
>  	int ret;
> +	struct iproc_pcie *pcie = iproc_data(bus);
>  
>  	iproc_pcie_apb_err_disable(bus, true);
> -	ret = pci_generic_config_read32(bus, devfn, where, size, val);
> +	if (pcie->type == IPROC_PCIE_PAXB_V2)
> +		ret = iproc_pcie_config_read(bus, devfn, where, size, val);
> +	else
> +		ret = pci_generic_config_read32(bus, devfn, where, size, val);
>  	iproc_pcie_apb_err_disable(bus, false);
>  
>  	return ret;
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC
  2017-06-11  4:05 ` [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC Oza Pawandeep
@ 2017-06-12 23:43   ` Bjorn Helgaas
  2017-06-14  4:54     ` Oza Oza
  0 siblings, 1 reply; 12+ messages in thread
From: Bjorn Helgaas @ 2017-06-12 23:43 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	bcm-kernel-feedback-list, Andy Gospodarek, linux-pci,
	linux-kernel, Oza Pawandeep

On Sun, Jun 11, 2017 at 09:35:38AM +0530, Oza Pawandeep wrote:
> PERST# must be asserted around ~500ms before
> the reboot is applied.
> 
> During soft reset (e.g., "reboot" from Linux) on some iProc based SoCs
> LCPLL clock and PERST both goes off simultaneously.
> This will cause certain Endpoints Intel NVMe not get detected, upon
> next boot sequence.
> 
> This happens because; Endpoint is expecting the clock for some amount of
> time after PERST is asserted, which is not happening in our case
> (Compare to Intel X86 boards, will have clocks running).
> this cause NVMe to behave in undefined way.

This doesn't smell right.  The description makes this sound like a
band-aid that happens to "solve" a problem with one particular device.
It doesn't sound like this is making iProc adhere to something in the
PCIe spec.

Is there some PCIe spec timing requirement that tells you about this
500ms number?  Please include the reference if so.

> Essentially clock will remain alive for 500ms with PERST# = 0
> before reboot.

> This patch adds platform shutdown where it should be
> called in device_shutdown while reboot command is issued.
> 
> So in sequence first Endpoint Shutdown (e.g. nvme_shutdown)
> followed by RC shutdown is called, which issues safe PERST
> assertion.
> 
> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
> 
> diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
> index 90d2bdd..9512960 100644
> --- a/drivers/pci/host/pcie-iproc-platform.c
> +++ b/drivers/pci/host/pcie-iproc-platform.c
> @@ -131,6 +131,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
>  	return iproc_pcie_remove(pcie);
>  }
>  
> +static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
> +{
> +	struct iproc_pcie *pcie = platform_get_drvdata(pdev);
> +
> +	iproc_pcie_shutdown(pcie);
> +}
> +
>  static struct platform_driver iproc_pcie_pltfm_driver = {
>  	.driver = {
>  		.name = "iproc-pcie",
> @@ -138,6 +145,7 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
>  	},
>  	.probe = iproc_pcie_pltfm_probe,
>  	.remove = iproc_pcie_pltfm_remove,
> +	.shutdown = iproc_pcie_pltfm_shutdown,
>  };
>  module_platform_driver(iproc_pcie_pltfm_driver);
>  
> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
> index 05a3647..bb61376 100644
> --- a/drivers/pci/host/pcie-iproc.c
> +++ b/drivers/pci/host/pcie-iproc.c
> @@ -608,32 +608,40 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn,
>  	.write = iproc_pcie_config_write32,
>  };
>  
> -static void iproc_pcie_reset(struct iproc_pcie *pcie)
> +static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
>  {
>  	u32 val;
>  
>  	/*
> -	 * PAXC and the internal emulated endpoint device downstream should not
> -	 * be reset.  If firmware has been loaded on the endpoint device at an
> -	 * earlier boot stage, reset here causes issues.
> +	 * The internal emulated endpoints (such as PAXC) device downstream
> +	 * should not be reset. If firmware has been loaded on the endpoint
> +	 * device at an earlier boot stage, reset here causes issues.
>  	 */
>  	if (pcie->ep_is_internal)
>  		return;
>  
> -	/*
> -	 * Select perst_b signal as reset source. Put the device into reset,
> -	 * and then bring it out of reset
> -	 */
> -	val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
> -	val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
> -		~RC_PCIE_RST_OUTPUT;
> -	iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> -	udelay(250);
> -
> -	val |= RC_PCIE_RST_OUTPUT;
> -	iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> -	msleep(100);
> +	if (assert) {
> +		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
> +		val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
> +			~RC_PCIE_RST_OUTPUT;
> +		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> +		udelay(250);
> +	} else {
> +		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
> +		val |= RC_PCIE_RST_OUTPUT;
> +		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> +		msleep(100);
> +	}
> +}
> +
> +int iproc_pcie_shutdown(struct iproc_pcie *pcie)
> +{
> +	iproc_pcie_perst_ctrl(pcie, true);
> +	msleep(500);
> +
> +	return 0;
>  }
> +EXPORT_SYMBOL(iproc_pcie_shutdown);
>  
>  static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus)
>  {
> @@ -1310,7 +1318,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
>  		goto err_exit_phy;
>  	}
>  
> -	iproc_pcie_reset(pcie);
> +	iproc_pcie_perst_ctrl(pcie, true);
> +	iproc_pcie_perst_ctrl(pcie, false);
>  
>  	if (pcie->need_ob_cfg) {
>  		ret = iproc_pcie_map_ranges(pcie, res);
> diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
> index 0bbe2ea..a6b55ce 100644
> --- a/drivers/pci/host/pcie-iproc.h
> +++ b/drivers/pci/host/pcie-iproc.h
> @@ -110,6 +110,7 @@ struct iproc_pcie {
>  
>  int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
>  int iproc_pcie_remove(struct iproc_pcie *pcie);
> +int iproc_pcie_shutdown(struct iproc_pcie *pcie);
>  
>  #ifdef CONFIG_PCIE_IPROC_MSI
>  int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP
  2017-06-12 23:30   ` Bjorn Helgaas
@ 2017-06-13  4:28     ` Oza Oza
  2017-06-13  5:40       ` Oza Oza
  2017-06-19 22:39       ` Bjorn Helgaas
  0 siblings, 2 replies; 12+ messages in thread
From: Oza Oza @ 2017-06-13  4:28 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Tue, Jun 13, 2017 at 5:00 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> Please wrap your changelogs to use 75 columns.  "git log" indents the
> changelog by four spaces, so if your text is 75 wide, it will still
> fit without wrapping.
>
> On Sun, Jun 11, 2017 at 09:35:37AM +0530, Oza Pawandeep wrote:
>> For Configuration Requests only, following reset
>> it is possible for a device to terminate the request
>> but indicate that it is temporarily unable to process
>> the Request, but will be able to process the Request
>> in the future – in this case, the Configuration Request
>> Retry Status 10 (CRS) Completion Status is used
>
> How does this relate to the CRS support we already have in the core,
> e.g., pci_bus_read_dev_vendor_id()?  It looks like your root complex
> already returns 0xffff0001 (CFG_RETRY_STATUS) in some cases.
>
> Also, per spec (PCIe r3.1, sec 2.3.2), CRS Software Visibility only
> affects config reads of the Vendor ID, but you call
> iproc_pcie_cfg_retry() for all config offsets.

Yes, as per Spec, CRS Software Visibility only affects config read of
the Vendor ID.
For config write or any other config read the Root must automatically
re-issue configuration
request again as a new request, and our PCIe RC fails to do so.

>
>> SPDK user space NVMe driver reinitializes NVMe which
>> causes reset, while doing this some configuration requests
>> get NAKed by Endpoint (NVMe).
>
> What's SPDK?  I don't know what "NAK" means in a PCIe context.  If you
> can use the appropriate PCIe terminology, I think it will make more
> sense to me.

when I meant NAK, I meant CRS, will change the description, and will take
care of using appropriate PCIe terminology.

>
>> Current iproc PCI driver is agnostic about it.
>> PAXB will forward the NAKed response in stipulated AXI code.
>
> In general a native host bridge driver should not have to deal with
> the CRS feature because it's supported in the PCI core.  So we need
> some explanation about why iProc is special in this regard.
>

For config write or any other config read the Root must automatically
re-issue configuration
request again as a new request, iproc based PCIe RC does not adhere to
this, and also
our PCI-to-AXI bridge (internal), which returns code 0xffff0001 to CPU.

>> NVMe spec defines this timeout in 500 ms units, and this
>> only happens if controller has been in reset, or with new firmware,
>> or in abrupt shutdown case.
>> Meanwhile config access could result into retry.
>
> I don't understand why NVMe is relevant here.  Is there something
> special about NVMe and CRS?
>

You are right, NVMe spec is irrelevant here, but since whole
exercise was carried out with NVMe and our major use cases are NVMe,
I ended up mentioning that. I can remove that from description.

>> This patch fixes the problem, and attempts to read again in case
>> of PAXB forwarding the NAK.
>>
>> It implements iproc_pcie_config_read which gets called for Stingray.
>> Otherwise it falls back to PCI generic APIs.
>>
>> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
>> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
>>
>> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
>> index 0f39bd2..05a3647 100644
>> --- a/drivers/pci/host/pcie-iproc.c
>> +++ b/drivers/pci/host/pcie-iproc.c
>> @@ -68,6 +68,9 @@
>>  #define APB_ERR_EN_SHIFT             0
>>  #define APB_ERR_EN                   BIT(APB_ERR_EN_SHIFT)
>>
>> +#define CFG_RETRY_STATUS             0xffff0001
>> +#define CFG_RETRY_STATUS_TIMEOUT_US  500000 /* 500 milli-seconds. */
>> +
>>  /* derive the enum index of the outbound/inbound mapping registers */
>>  #define MAP_REG(base_reg, index)      ((base_reg) + (index) * 2)
>>
>> @@ -448,6 +451,47 @@ static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus,
>>       }
>>  }
>>
>> +static int iproc_pcie_cfg_retry(void __iomem *cfg_data_p)
>> +{
>> +     int timeout = CFG_RETRY_STATUS_TIMEOUT_US;
>> +     unsigned int ret;
>> +
>> +     do {
>> +             ret = readl(cfg_data_p);
>> +             if (ret == CFG_RETRY_STATUS)
>> +                     udelay(1);
>> +             else
>> +                     return PCIBIOS_SUCCESSFUL;
>> +     } while (timeout--);
>> +
>> +     return PCIBIOS_DEVICE_NOT_FOUND;
>> +}
>> +
>> +static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie,
>> +                                            unsigned int busno,
>> +                                            unsigned int slot,
>> +                                            unsigned int fn,
>> +                                            int where)
>> +{
>> +     u16 offset;
>> +     u32 val;
>> +
>> +     /* EP device access */
>> +     val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
>> +             (slot << CFG_ADDR_DEV_NUM_SHIFT) |
>> +             (fn << CFG_ADDR_FUNC_NUM_SHIFT) |
>> +             (where & CFG_ADDR_REG_NUM_MASK) |
>> +             (1 & CFG_ADDR_CFG_TYPE_MASK);
>> +
>> +     iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
>> +     offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
>> +
>> +     if (iproc_pcie_reg_is_invalid(offset))
>> +             return NULL;
>> +
>> +     return (pcie->base + offset);
>> +}
>> +
>>  /**
>>   * Note access to the configuration registers are protected at the higher layer
>>   * by 'pci_lock' in drivers/pci/access.c
>> @@ -499,13 +543,48 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
>>               return (pcie->base + offset);
>>  }
>>
>> +static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>> +                                 int where, int size, u32 *val)
>> +{
>> +     struct iproc_pcie *pcie = iproc_data(bus);
>> +     unsigned int slot = PCI_SLOT(devfn);
>> +     unsigned int fn = PCI_FUNC(devfn);
>> +     unsigned int busno = bus->number;
>> +     void __iomem *cfg_data_p;
>> +     int ret;
>> +
>> +     /* root complex access. */
>> +     if (busno == 0)
>> +             return pci_generic_config_read32(bus, devfn, where, size, val);
>> +
>> +     cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
>> +
>> +     if (!cfg_data_p)
>> +             return PCIBIOS_DEVICE_NOT_FOUND;
>> +
>> +     ret = iproc_pcie_cfg_retry(cfg_data_p);
>> +     if (ret)
>> +             return ret;
>> +
>> +     *val = readl(cfg_data_p);
>> +
>> +     if (size <= 2)
>> +             *val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
>> +
>> +     return PCIBIOS_SUCCESSFUL;
>> +}
>> +
>>  static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>>                                   int where, int size, u32 *val)
>>  {
>>       int ret;
>> +     struct iproc_pcie *pcie = iproc_data(bus);
>>
>>       iproc_pcie_apb_err_disable(bus, true);
>> -     ret = pci_generic_config_read32(bus, devfn, where, size, val);
>> +     if (pcie->type == IPROC_PCIE_PAXB_V2)
>> +             ret = iproc_pcie_config_read(bus, devfn, where, size, val);
>> +     else
>> +             ret = pci_generic_config_read32(bus, devfn, where, size, val);
>>       iproc_pcie_apb_err_disable(bus, false);
>>
>>       return ret;
>> --
>> 1.9.1
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP
  2017-06-13  4:28     ` Oza Oza
@ 2017-06-13  5:40       ` Oza Oza
  2017-06-19 22:39       ` Bjorn Helgaas
  1 sibling, 0 replies; 12+ messages in thread
From: Oza Oza @ 2017-06-13  5:40 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Tue, Jun 13, 2017 at 9:58 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Tue, Jun 13, 2017 at 5:00 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> Please wrap your changelogs to use 75 columns.  "git log" indents the
>> changelog by four spaces, so if your text is 75 wide, it will still
>> fit without wrapping.
>>
>> On Sun, Jun 11, 2017 at 09:35:37AM +0530, Oza Pawandeep wrote:
>>> For Configuration Requests only, following reset
>>> it is possible for a device to terminate the request
>>> but indicate that it is temporarily unable to process
>>> the Request, but will be able to process the Request
>>> in the future – in this case, the Configuration Request
>>> Retry Status 10 (CRS) Completion Status is used
>>
>> How does this relate to the CRS support we already have in the core,
>> e.g., pci_bus_read_dev_vendor_id()?  It looks like your root complex
>> already returns 0xffff0001 (CFG_RETRY_STATUS) in some cases.
>>
>> Also, per spec (PCIe r3.1, sec 2.3.2), CRS Software Visibility only
>> affects config reads of the Vendor ID, but you call
>> iproc_pcie_cfg_retry() for all config offsets.
>
> Yes, as per Spec, CRS Software Visibility only affects config read of
> the Vendor ID.
> For config write or any other config read the Root must automatically
> re-issue configuration
> request again as a new request, and our PCIe RC fails to do so.
>
>>
>>> SPDK user space NVMe driver reinitializes NVMe which
>>> causes reset, while doing this some configuration requests
>>> get NAKed by Endpoint (NVMe).
>>
>> What's SPDK?  I don't know what "NAK" means in a PCIe context.  If you
>> can use the appropriate PCIe terminology, I think it will make more
>> sense to me.

SPDK supports user space poll mode driver, and along with DPDK
interface with vfio
to directly map PCIe resources to user space.
the reason I mentioned SPDK, because it exposes this bug in our PCIe RC.

>
> when I meant NAK, I meant CRS, will change the description, and will take
> care of using appropriate PCIe terminology.
>
>>
>>> Current iproc PCI driver is agnostic about it.
>>> PAXB will forward the NAKed response in stipulated AXI code.
>>
>> In general a native host bridge driver should not have to deal with
>> the CRS feature because it's supported in the PCI core.  So we need
>> some explanation about why iProc is special in this regard.
>>
>
> For config write or any other config read the Root must automatically
> re-issue configuration
> request again as a new request, iproc based PCIe RC does not adhere to
> this, and also
> our PCI-to-AXI bridge (internal), which returns code 0xffff0001 to CPU.
>
>>> NVMe spec defines this timeout in 500 ms units, and this
>>> only happens if controller has been in reset, or with new firmware,
>>> or in abrupt shutdown case.
>>> Meanwhile config access could result into retry.
>>
>> I don't understand why NVMe is relevant here.  Is there something
>> special about NVMe and CRS?
>>
>
> You are right, NVMe spec is irrelevant here, but since whole
> exercise was carried out with NVMe and our major use cases are NVMe,
> I ended up mentioning that. I can remove that from description.
>
>>> This patch fixes the problem, and attempts to read again in case
>>> of PAXB forwarding the NAK.
>>>
>>> It implements iproc_pcie_config_read which gets called for Stingray.
>>> Otherwise it falls back to PCI generic APIs.
>>>
>>> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
>>> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>>> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
>>>
>>> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
>>> index 0f39bd2..05a3647 100644
>>> --- a/drivers/pci/host/pcie-iproc.c
>>> +++ b/drivers/pci/host/pcie-iproc.c
>>> @@ -68,6 +68,9 @@
>>>  #define APB_ERR_EN_SHIFT             0
>>>  #define APB_ERR_EN                   BIT(APB_ERR_EN_SHIFT)
>>>
>>> +#define CFG_RETRY_STATUS             0xffff0001
>>> +#define CFG_RETRY_STATUS_TIMEOUT_US  500000 /* 500 milli-seconds. */
>>> +
>>>  /* derive the enum index of the outbound/inbound mapping registers */
>>>  #define MAP_REG(base_reg, index)      ((base_reg) + (index) * 2)
>>>
>>> @@ -448,6 +451,47 @@ static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus,
>>>       }
>>>  }
>>>
>>> +static int iproc_pcie_cfg_retry(void __iomem *cfg_data_p)
>>> +{
>>> +     int timeout = CFG_RETRY_STATUS_TIMEOUT_US;
>>> +     unsigned int ret;
>>> +
>>> +     do {
>>> +             ret = readl(cfg_data_p);
>>> +             if (ret == CFG_RETRY_STATUS)
>>> +                     udelay(1);
>>> +             else
>>> +                     return PCIBIOS_SUCCESSFUL;
>>> +     } while (timeout--);
>>> +
>>> +     return PCIBIOS_DEVICE_NOT_FOUND;
>>> +}
>>> +
>>> +static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie,
>>> +                                            unsigned int busno,
>>> +                                            unsigned int slot,
>>> +                                            unsigned int fn,
>>> +                                            int where)
>>> +{
>>> +     u16 offset;
>>> +     u32 val;
>>> +
>>> +     /* EP device access */
>>> +     val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
>>> +             (slot << CFG_ADDR_DEV_NUM_SHIFT) |
>>> +             (fn << CFG_ADDR_FUNC_NUM_SHIFT) |
>>> +             (where & CFG_ADDR_REG_NUM_MASK) |
>>> +             (1 & CFG_ADDR_CFG_TYPE_MASK);
>>> +
>>> +     iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
>>> +     offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
>>> +
>>> +     if (iproc_pcie_reg_is_invalid(offset))
>>> +             return NULL;
>>> +
>>> +     return (pcie->base + offset);
>>> +}
>>> +
>>>  /**
>>>   * Note access to the configuration registers are protected at the higher layer
>>>   * by 'pci_lock' in drivers/pci/access.c
>>> @@ -499,13 +543,48 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
>>>               return (pcie->base + offset);
>>>  }
>>>
>>> +static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>>> +                                 int where, int size, u32 *val)
>>> +{
>>> +     struct iproc_pcie *pcie = iproc_data(bus);
>>> +     unsigned int slot = PCI_SLOT(devfn);
>>> +     unsigned int fn = PCI_FUNC(devfn);
>>> +     unsigned int busno = bus->number;
>>> +     void __iomem *cfg_data_p;
>>> +     int ret;
>>> +
>>> +     /* root complex access. */
>>> +     if (busno == 0)
>>> +             return pci_generic_config_read32(bus, devfn, where, size, val);
>>> +
>>> +     cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
>>> +
>>> +     if (!cfg_data_p)
>>> +             return PCIBIOS_DEVICE_NOT_FOUND;
>>> +
>>> +     ret = iproc_pcie_cfg_retry(cfg_data_p);
>>> +     if (ret)
>>> +             return ret;
>>> +
>>> +     *val = readl(cfg_data_p);
>>> +
>>> +     if (size <= 2)
>>> +             *val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
>>> +
>>> +     return PCIBIOS_SUCCESSFUL;
>>> +}
>>> +
>>>  static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>>>                                   int where, int size, u32 *val)
>>>  {
>>>       int ret;
>>> +     struct iproc_pcie *pcie = iproc_data(bus);
>>>
>>>       iproc_pcie_apb_err_disable(bus, true);
>>> -     ret = pci_generic_config_read32(bus, devfn, where, size, val);
>>> +     if (pcie->type == IPROC_PCIE_PAXB_V2)
>>> +             ret = iproc_pcie_config_read(bus, devfn, where, size, val);
>>> +     else
>>> +             ret = pci_generic_config_read32(bus, devfn, where, size, val);
>>>       iproc_pcie_apb_err_disable(bus, false);
>>>
>>>       return ret;
>>> --
>>> 1.9.1
>>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC
  2017-06-12 23:43   ` Bjorn Helgaas
@ 2017-06-14  4:54     ` Oza Oza
  2017-06-15 13:41       ` Bjorn Helgaas
  0 siblings, 1 reply; 12+ messages in thread
From: Oza Oza @ 2017-06-14  4:54 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Tue, Jun 13, 2017 at 5:13 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Sun, Jun 11, 2017 at 09:35:38AM +0530, Oza Pawandeep wrote:
>> PERST# must be asserted around ~500ms before
>> the reboot is applied.
>>
>> During soft reset (e.g., "reboot" from Linux) on some iProc based SoCs
>> LCPLL clock and PERST both goes off simultaneously.
>> This will cause certain Endpoints Intel NVMe not get detected, upon
>> next boot sequence.
>>
>> This happens because; Endpoint is expecting the clock for some amount of
>> time after PERST is asserted, which is not happening in our case
>> (Compare to Intel X86 boards, will have clocks running).
>> this cause NVMe to behave in undefined way.
>
> This doesn't smell right.  The description makes this sound like a
> band-aid that happens to "solve" a problem with one particular device.
> It doesn't sound like this is making iProc adhere to something in the
> PCIe spec.
>
> Is there some PCIe spec timing requirement that tells you about this
> 500ms number?  Please include the reference if so.

On chip PLL which sources the reference clock to the device gets reset
when soft reset is applied; the soft reset also asserts PERST#.
This simultaneous assertion of reset and clock being lost is a problem
with some NVME cards.
The delay is added to keep clock alive while PERST gets asserted.
The time delay number can be set to a number that will allow the NVME
device to go into reset state.
500 ms delay is picked for that reason only, which is long enough to
get EP into reset correctly.

>
>> Essentially clock will remain alive for 500ms with PERST# = 0
>> before reboot.
>
>> This patch adds platform shutdown where it should be
>> called in device_shutdown while reboot command is issued.
>>
>> So in sequence first Endpoint Shutdown (e.g. nvme_shutdown)
>> followed by RC shutdown is called, which issues safe PERST
>> assertion.
>>
>> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
>> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
>>
>> diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
>> index 90d2bdd..9512960 100644
>> --- a/drivers/pci/host/pcie-iproc-platform.c
>> +++ b/drivers/pci/host/pcie-iproc-platform.c
>> @@ -131,6 +131,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
>>       return iproc_pcie_remove(pcie);
>>  }
>>
>> +static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
>> +{
>> +     struct iproc_pcie *pcie = platform_get_drvdata(pdev);
>> +
>> +     iproc_pcie_shutdown(pcie);
>> +}
>> +
>>  static struct platform_driver iproc_pcie_pltfm_driver = {
>>       .driver = {
>>               .name = "iproc-pcie",
>> @@ -138,6 +145,7 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
>>       },
>>       .probe = iproc_pcie_pltfm_probe,
>>       .remove = iproc_pcie_pltfm_remove,
>> +     .shutdown = iproc_pcie_pltfm_shutdown,
>>  };
>>  module_platform_driver(iproc_pcie_pltfm_driver);
>>
>> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
>> index 05a3647..bb61376 100644
>> --- a/drivers/pci/host/pcie-iproc.c
>> +++ b/drivers/pci/host/pcie-iproc.c
>> @@ -608,32 +608,40 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn,
>>       .write = iproc_pcie_config_write32,
>>  };
>>
>> -static void iproc_pcie_reset(struct iproc_pcie *pcie)
>> +static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
>>  {
>>       u32 val;
>>
>>       /*
>> -      * PAXC and the internal emulated endpoint device downstream should not
>> -      * be reset.  If firmware has been loaded on the endpoint device at an
>> -      * earlier boot stage, reset here causes issues.
>> +      * The internal emulated endpoints (such as PAXC) device downstream
>> +      * should not be reset. If firmware has been loaded on the endpoint
>> +      * device at an earlier boot stage, reset here causes issues.
>>        */
>>       if (pcie->ep_is_internal)
>>               return;
>>
>> -     /*
>> -      * Select perst_b signal as reset source. Put the device into reset,
>> -      * and then bring it out of reset
>> -      */
>> -     val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
>> -     val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
>> -             ~RC_PCIE_RST_OUTPUT;
>> -     iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> -     udelay(250);
>> -
>> -     val |= RC_PCIE_RST_OUTPUT;
>> -     iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> -     msleep(100);
>> +     if (assert) {
>> +             val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
>> +             val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
>> +                     ~RC_PCIE_RST_OUTPUT;
>> +             iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> +             udelay(250);
>> +     } else {
>> +             val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
>> +             val |= RC_PCIE_RST_OUTPUT;
>> +             iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> +             msleep(100);
>> +     }
>> +}
>> +
>> +int iproc_pcie_shutdown(struct iproc_pcie *pcie)
>> +{
>> +     iproc_pcie_perst_ctrl(pcie, true);
>> +     msleep(500);
>> +
>> +     return 0;
>>  }
>> +EXPORT_SYMBOL(iproc_pcie_shutdown);
>>
>>  static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus)
>>  {
>> @@ -1310,7 +1318,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
>>               goto err_exit_phy;
>>       }
>>
>> -     iproc_pcie_reset(pcie);
>> +     iproc_pcie_perst_ctrl(pcie, true);
>> +     iproc_pcie_perst_ctrl(pcie, false);
>>
>>       if (pcie->need_ob_cfg) {
>>               ret = iproc_pcie_map_ranges(pcie, res);
>> diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
>> index 0bbe2ea..a6b55ce 100644
>> --- a/drivers/pci/host/pcie-iproc.h
>> +++ b/drivers/pci/host/pcie-iproc.h
>> @@ -110,6 +110,7 @@ struct iproc_pcie {
>>
>>  int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
>>  int iproc_pcie_remove(struct iproc_pcie *pcie);
>> +int iproc_pcie_shutdown(struct iproc_pcie *pcie);
>>
>>  #ifdef CONFIG_PCIE_IPROC_MSI
>>  int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
>> --
>> 1.9.1
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC
  2017-06-14  4:54     ` Oza Oza
@ 2017-06-15 13:41       ` Bjorn Helgaas
  2017-06-21  8:04         ` Oza Oza
  0 siblings, 1 reply; 12+ messages in thread
From: Bjorn Helgaas @ 2017-06-15 13:41 UTC (permalink / raw)
  To: Oza Oza
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Wed, Jun 14, 2017 at 10:24:11AM +0530, Oza Oza wrote:
> On Tue, Jun 13, 2017 at 5:13 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Sun, Jun 11, 2017 at 09:35:38AM +0530, Oza Pawandeep wrote:
> >> PERST# must be asserted around ~500ms before
> >> the reboot is applied.
> >>
> >> During soft reset (e.g., "reboot" from Linux) on some iProc based SoCs
> >> LCPLL clock and PERST both goes off simultaneously.
> >> This will cause certain Endpoints Intel NVMe not get detected, upon
> >> next boot sequence.
> >>
> >> This happens because; Endpoint is expecting the clock for some amount of
> >> time after PERST is asserted, which is not happening in our case
> >> (Compare to Intel X86 boards, will have clocks running).
> >> this cause NVMe to behave in undefined way.
> >
> > This doesn't smell right.  The description makes this sound like a
> > band-aid that happens to "solve" a problem with one particular device.
> > It doesn't sound like this is making iProc adhere to something in the
> > PCIe spec.
> >
> > Is there some PCIe spec timing requirement that tells you about this
> > 500ms number?  Please include the reference if so.
> 
> On chip PLL which sources the reference clock to the device gets reset
> when soft reset is applied; the soft reset also asserts PERST#.
> This simultaneous assertion of reset and clock being lost is a problem
> with some NVME cards.
> The delay is added to keep clock alive while PERST gets asserted.
> The time delay number can be set to a number that will allow the NVME
> device to go into reset state.
> 500 ms delay is picked for that reason only, which is long enough to
> get EP into reset correctly.

This doesn't really tell me anything new.

We're talking about the protocol on the link between the RC and the
endpoint.  That *should* be completely specified by the PCIe spec.  If
there's some requirement that the clock keep running after PERST is
asserted, that should be in the spec.

If it's not in the spec, and an endpoint relies on it, the endpoint is
non-compliant, and we'll likely see similar issues with that endpoint
on other platforms.

If we need a workaround for a specific endpoint, that might be OK; the
world isn't perfect.  But if that's the case, we should include more
specifics about the device, e.g., vendor/device IDs and "lspci -vv"
output.

> >> Essentially clock will remain alive for 500ms with PERST# = 0
> >> before reboot.
> >
> >> This patch adds platform shutdown where it should be
> >> called in device_shutdown while reboot command is issued.
> >>
> >> So in sequence first Endpoint Shutdown (e.g. nvme_shutdown)
> >> followed by RC shutdown is called, which issues safe PERST
> >> assertion.
> >>
> >> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
> >> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
> >> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
> >>
> >> diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
> >> index 90d2bdd..9512960 100644
> >> --- a/drivers/pci/host/pcie-iproc-platform.c
> >> +++ b/drivers/pci/host/pcie-iproc-platform.c
> >> @@ -131,6 +131,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
> >>       return iproc_pcie_remove(pcie);
> >>  }
> >>
> >> +static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
> >> +{
> >> +     struct iproc_pcie *pcie = platform_get_drvdata(pdev);
> >> +
> >> +     iproc_pcie_shutdown(pcie);
> >> +}
> >> +
> >>  static struct platform_driver iproc_pcie_pltfm_driver = {
> >>       .driver = {
> >>               .name = "iproc-pcie",
> >> @@ -138,6 +145,7 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
> >>       },
> >>       .probe = iproc_pcie_pltfm_probe,
> >>       .remove = iproc_pcie_pltfm_remove,
> >> +     .shutdown = iproc_pcie_pltfm_shutdown,
> >>  };
> >>  module_platform_driver(iproc_pcie_pltfm_driver);
> >>
> >> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
> >> index 05a3647..bb61376 100644
> >> --- a/drivers/pci/host/pcie-iproc.c
> >> +++ b/drivers/pci/host/pcie-iproc.c
> >> @@ -608,32 +608,40 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn,
> >>       .write = iproc_pcie_config_write32,
> >>  };
> >>
> >> -static void iproc_pcie_reset(struct iproc_pcie *pcie)
> >> +static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
> >>  {
> >>       u32 val;
> >>
> >>       /*
> >> -      * PAXC and the internal emulated endpoint device downstream should not
> >> -      * be reset.  If firmware has been loaded on the endpoint device at an
> >> -      * earlier boot stage, reset here causes issues.
> >> +      * The internal emulated endpoints (such as PAXC) device downstream
> >> +      * should not be reset. If firmware has been loaded on the endpoint
> >> +      * device at an earlier boot stage, reset here causes issues.
> >>        */
> >>       if (pcie->ep_is_internal)
> >>               return;
> >>
> >> -     /*
> >> -      * Select perst_b signal as reset source. Put the device into reset,
> >> -      * and then bring it out of reset
> >> -      */
> >> -     val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
> >> -     val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
> >> -             ~RC_PCIE_RST_OUTPUT;
> >> -     iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> >> -     udelay(250);
> >> -
> >> -     val |= RC_PCIE_RST_OUTPUT;
> >> -     iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> >> -     msleep(100);
> >> +     if (assert) {
> >> +             val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
> >> +             val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
> >> +                     ~RC_PCIE_RST_OUTPUT;
> >> +             iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> >> +             udelay(250);
> >> +     } else {
> >> +             val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
> >> +             val |= RC_PCIE_RST_OUTPUT;
> >> +             iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
> >> +             msleep(100);
> >> +     }
> >> +}
> >> +
> >> +int iproc_pcie_shutdown(struct iproc_pcie *pcie)
> >> +{
> >> +     iproc_pcie_perst_ctrl(pcie, true);
> >> +     msleep(500);
> >> +
> >> +     return 0;
> >>  }
> >> +EXPORT_SYMBOL(iproc_pcie_shutdown);
> >>
> >>  static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus)
> >>  {
> >> @@ -1310,7 +1318,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
> >>               goto err_exit_phy;
> >>       }
> >>
> >> -     iproc_pcie_reset(pcie);
> >> +     iproc_pcie_perst_ctrl(pcie, true);
> >> +     iproc_pcie_perst_ctrl(pcie, false);
> >>
> >>       if (pcie->need_ob_cfg) {
> >>               ret = iproc_pcie_map_ranges(pcie, res);
> >> diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
> >> index 0bbe2ea..a6b55ce 100644
> >> --- a/drivers/pci/host/pcie-iproc.h
> >> +++ b/drivers/pci/host/pcie-iproc.h
> >> @@ -110,6 +110,7 @@ struct iproc_pcie {
> >>
> >>  int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
> >>  int iproc_pcie_remove(struct iproc_pcie *pcie);
> >> +int iproc_pcie_shutdown(struct iproc_pcie *pcie);
> >>
> >>  #ifdef CONFIG_PCIE_IPROC_MSI
> >>  int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
> >> --
> >> 1.9.1
> >>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP
  2017-06-13  4:28     ` Oza Oza
  2017-06-13  5:40       ` Oza Oza
@ 2017-06-19 22:39       ` Bjorn Helgaas
  2017-06-20 12:44         ` Oza Oza
  1 sibling, 1 reply; 12+ messages in thread
From: Bjorn Helgaas @ 2017-06-19 22:39 UTC (permalink / raw)
  To: Oza Oza
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Tue, Jun 13, 2017 at 09:58:22AM +0530, Oza Oza wrote:
> On Tue, Jun 13, 2017 at 5:00 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > Please wrap your changelogs to use 75 columns.  "git log" indents the
> > changelog by four spaces, so if your text is 75 wide, it will still
> > fit without wrapping.
> >
> > On Sun, Jun 11, 2017 at 09:35:37AM +0530, Oza Pawandeep wrote:
> >> For Configuration Requests only, following reset
> >> it is possible for a device to terminate the request
> >> but indicate that it is temporarily unable to process
> >> the Request, but will be able to process the Request
> >> in the future – in this case, the Configuration Request
> >> Retry Status 10 (CRS) Completion Status is used
> >
> > How does this relate to the CRS support we already have in the core,
> > e.g., pci_bus_read_dev_vendor_id()?  It looks like your root complex
> > already returns 0xffff0001 (CFG_RETRY_STATUS) in some cases.
> >
> > Also, per spec (PCIe r3.1, sec 2.3.2), CRS Software Visibility only
> > affects config reads of the Vendor ID, but you call
> > iproc_pcie_cfg_retry() for all config offsets.
> 
> Yes, as per Spec, CRS Software Visibility only affects config read of
> the Vendor ID.
> For config write or any other config read the Root must automatically
> re-issue configuration
> request again as a new request, and our PCIe RC fails to do so.

OK, if this is a workaround for a hardware defect, let's make that
explicit in the changelog (and probably a comment in the code, too).

I'm actually not sure the spec *requires* the CRS retries to be done
directly in hardware, so it's conceivable the hardware could be
working as designed.  But a comment would go a long way toward making
this understandable by differentiating it from the generic CRS
handling in the core.

Bjorn

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP
  2017-06-19 22:39       ` Bjorn Helgaas
@ 2017-06-20 12:44         ` Oza Oza
  0 siblings, 0 replies; 12+ messages in thread
From: Oza Oza @ 2017-06-20 12:44 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Tue, Jun 20, 2017 at 4:09 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Tue, Jun 13, 2017 at 09:58:22AM +0530, Oza Oza wrote:
>> On Tue, Jun 13, 2017 at 5:00 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> > Please wrap your changelogs to use 75 columns.  "git log" indents the
>> > changelog by four spaces, so if your text is 75 wide, it will still
>> > fit without wrapping.
>> >
>> > On Sun, Jun 11, 2017 at 09:35:37AM +0530, Oza Pawandeep wrote:
>> >> For Configuration Requests only, following reset
>> >> it is possible for a device to terminate the request
>> >> but indicate that it is temporarily unable to process
>> >> the Request, but will be able to process the Request
>> >> in the future – in this case, the Configuration Request
>> >> Retry Status 10 (CRS) Completion Status is used
>> >
>> > How does this relate to the CRS support we already have in the core,
>> > e.g., pci_bus_read_dev_vendor_id()?  It looks like your root complex
>> > already returns 0xffff0001 (CFG_RETRY_STATUS) in some cases.
>> >
>> > Also, per spec (PCIe r3.1, sec 2.3.2), CRS Software Visibility only
>> > affects config reads of the Vendor ID, but you call
>> > iproc_pcie_cfg_retry() for all config offsets.
>>
>> Yes, as per Spec, CRS Software Visibility only affects config read of
>> the Vendor ID.
>> For config write or any other config read the Root must automatically
>> re-issue configuration
>> request again as a new request, and our PCIe RC fails to do so.
>
> OK, if this is a workaround for a hardware defect, let's make that
> explicit in the changelog (and probably a comment in the code, too).
>
> I'm actually not sure the spec *requires* the CRS retries to be done
> directly in hardware, so it's conceivable the hardware could be
> working as designed.  But a comment would go a long way toward making
> this understandable by differentiating it from the generic CRS
> handling in the core.
>
> Bjorn

Sure, I will update the commit message and also will add explicit
comment in the code.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC
  2017-06-15 13:41       ` Bjorn Helgaas
@ 2017-06-21  8:04         ` Oza Oza
  0 siblings, 0 replies; 12+ messages in thread
From: Oza Oza @ 2017-06-21  8:04 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Bjorn Helgaas, Ray Jui, Scott Branden, Jon Mason,
	BCM Kernel Feedback, Andy Gospodarek, linux-pci, linux-kernel,
	Oza Pawandeep

On Thu, Jun 15, 2017 at 7:11 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Wed, Jun 14, 2017 at 10:24:11AM +0530, Oza Oza wrote:
>> On Tue, Jun 13, 2017 at 5:13 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> > On Sun, Jun 11, 2017 at 09:35:38AM +0530, Oza Pawandeep wrote:
>> >> PERST# must be asserted around ~500ms before
>> >> the reboot is applied.
>> >>
>> >> During soft reset (e.g., "reboot" from Linux) on some iProc based SoCs
>> >> LCPLL clock and PERST both goes off simultaneously.
>> >> This will cause certain Endpoints Intel NVMe not get detected, upon
>> >> next boot sequence.
>> >>
>> >> This happens because; Endpoint is expecting the clock for some amount of
>> >> time after PERST is asserted, which is not happening in our case
>> >> (Compare to Intel X86 boards, will have clocks running).
>> >> this cause NVMe to behave in undefined way.
>> >
>> > This doesn't smell right.  The description makes this sound like a
>> > band-aid that happens to "solve" a problem with one particular device.
>> > It doesn't sound like this is making iProc adhere to something in the
>> > PCIe spec.
>> >
>> > Is there some PCIe spec timing requirement that tells you about this
>> > 500ms number?  Please include the reference if so.
>>
>> On chip PLL which sources the reference clock to the device gets reset
>> when soft reset is applied; the soft reset also asserts PERST#.
>> This simultaneous assertion of reset and clock being lost is a problem
>> with some NVME cards.
>> The delay is added to keep clock alive while PERST gets asserted.
>> The time delay number can be set to a number that will allow the NVME
>> device to go into reset state.
>> 500 ms delay is picked for that reason only, which is long enough to
>> get EP into reset correctly.
>
> This doesn't really tell me anything new.
>
> We're talking about the protocol on the link between the RC and the
> endpoint.  That *should* be completely specified by the PCIe spec.  If
> there's some requirement that the clock keep running after PERST is
> asserted, that should be in the spec.
>
> If it's not in the spec, and an endpoint relies on it, the endpoint is
> non-compliant, and we'll likely see similar issues with that endpoint
> on other platforms.
>
> If we need a workaround for a specific endpoint, that might be OK; the
> world isn't perfect.  But if that's the case, we should include more
> specifics about the device, e.g., vendor/device IDs and "lspci -vv"
> output.
>

This is specifically happening with Intel P3700 400 gb series.
http://ark.intel.com/products/79625/Intel-SSD-DC-P3700-Series-400GB-2_5in-PCIe-3_0-20nm-MLC

but on Intel board you will not find this problem, because ref clock
is supplied by
on-board oscillator.

But our board(s) do not have clock coming from on-board,
rather it is derived from PLL coming from SOC.

Besides, PCI spec does not stipulate about such timings.
In-fact it does not tell us, whether PCIe device should consider
refclk to be available or not to be.
500 ms is just based on the observation and taken as safe margin.

So, this is partly because of the Endpoint behavior and partly due to the
way our boards are designed.

with the same SSD, we will not see this issue on x86 boards, or
even any other ARM based boards, who supplies there ref clock by
on-bard oscillator,
so in case of reboot, ref clock does not go off.

please also refer to the link below which stipulate on clk being
active after PERST being asserted.
http://www.eetimes.com/document.asp?doc_id=1279299

Product: Intel P3700 400 gb SSD
Vendor ID: 8086
Device ID: 0953.

I am sorry, but we have made replacement order of those disks, so I
dont have lspci -vv output.
but hope that above information is sufficient.

>> >> Essentially clock will remain alive for 500ms with PERST# = 0
>> >> before reboot.
>> >
>> >> This patch adds platform shutdown where it should be
>> >> called in device_shutdown while reboot command is issued.
>> >>
>> >> So in sequence first Endpoint Shutdown (e.g. nvme_shutdown)
>> >> followed by RC shutdown is called, which issues safe PERST
>> >> assertion.
>> >>
>> >> Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
>> >> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>> >> Reviewed-by: Scott Branden <scott.branden@broadcom.com>
>> >>
>> >> diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
>> >> index 90d2bdd..9512960 100644
>> >> --- a/drivers/pci/host/pcie-iproc-platform.c
>> >> +++ b/drivers/pci/host/pcie-iproc-platform.c
>> >> @@ -131,6 +131,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
>> >>       return iproc_pcie_remove(pcie);
>> >>  }
>> >>
>> >> +static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
>> >> +{
>> >> +     struct iproc_pcie *pcie = platform_get_drvdata(pdev);
>> >> +
>> >> +     iproc_pcie_shutdown(pcie);
>> >> +}
>> >> +
>> >>  static struct platform_driver iproc_pcie_pltfm_driver = {
>> >>       .driver = {
>> >>               .name = "iproc-pcie",
>> >> @@ -138,6 +145,7 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
>> >>       },
>> >>       .probe = iproc_pcie_pltfm_probe,
>> >>       .remove = iproc_pcie_pltfm_remove,
>> >> +     .shutdown = iproc_pcie_pltfm_shutdown,
>> >>  };
>> >>  module_platform_driver(iproc_pcie_pltfm_driver);
>> >>
>> >> diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
>> >> index 05a3647..bb61376 100644
>> >> --- a/drivers/pci/host/pcie-iproc.c
>> >> +++ b/drivers/pci/host/pcie-iproc.c
>> >> @@ -608,32 +608,40 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn,
>> >>       .write = iproc_pcie_config_write32,
>> >>  };
>> >>
>> >> -static void iproc_pcie_reset(struct iproc_pcie *pcie)
>> >> +static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
>> >>  {
>> >>       u32 val;
>> >>
>> >>       /*
>> >> -      * PAXC and the internal emulated endpoint device downstream should not
>> >> -      * be reset.  If firmware has been loaded on the endpoint device at an
>> >> -      * earlier boot stage, reset here causes issues.
>> >> +      * The internal emulated endpoints (such as PAXC) device downstream
>> >> +      * should not be reset. If firmware has been loaded on the endpoint
>> >> +      * device at an earlier boot stage, reset here causes issues.
>> >>        */
>> >>       if (pcie->ep_is_internal)
>> >>               return;
>> >>
>> >> -     /*
>> >> -      * Select perst_b signal as reset source. Put the device into reset,
>> >> -      * and then bring it out of reset
>> >> -      */
>> >> -     val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
>> >> -     val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
>> >> -             ~RC_PCIE_RST_OUTPUT;
>> >> -     iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> >> -     udelay(250);
>> >> -
>> >> -     val |= RC_PCIE_RST_OUTPUT;
>> >> -     iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> >> -     msleep(100);
>> >> +     if (assert) {
>> >> +             val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
>> >> +             val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
>> >> +                     ~RC_PCIE_RST_OUTPUT;
>> >> +             iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> >> +             udelay(250);
>> >> +     } else {
>> >> +             val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
>> >> +             val |= RC_PCIE_RST_OUTPUT;
>> >> +             iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
>> >> +             msleep(100);
>> >> +     }
>> >> +}
>> >> +
>> >> +int iproc_pcie_shutdown(struct iproc_pcie *pcie)
>> >> +{
>> >> +     iproc_pcie_perst_ctrl(pcie, true);
>> >> +     msleep(500);
>> >> +
>> >> +     return 0;
>> >>  }
>> >> +EXPORT_SYMBOL(iproc_pcie_shutdown);
>> >>
>> >>  static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus)
>> >>  {
>> >> @@ -1310,7 +1318,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
>> >>               goto err_exit_phy;
>> >>       }
>> >>
>> >> -     iproc_pcie_reset(pcie);
>> >> +     iproc_pcie_perst_ctrl(pcie, true);
>> >> +     iproc_pcie_perst_ctrl(pcie, false);
>> >>
>> >>       if (pcie->need_ob_cfg) {
>> >>               ret = iproc_pcie_map_ranges(pcie, res);
>> >> diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
>> >> index 0bbe2ea..a6b55ce 100644
>> >> --- a/drivers/pci/host/pcie-iproc.h
>> >> +++ b/drivers/pci/host/pcie-iproc.h
>> >> @@ -110,6 +110,7 @@ struct iproc_pcie {
>> >>
>> >>  int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
>> >>  int iproc_pcie_remove(struct iproc_pcie *pcie);
>> >> +int iproc_pcie_shutdown(struct iproc_pcie *pcie);
>> >>
>> >>  #ifdef CONFIG_PCIE_IPROC_MSI
>> >>  int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
>> >> --
>> >> 1.9.1
>> >>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-06-21  8:05 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-11  4:05 [PATCH v3 0/2] PCI: iproc: SOC specific fixes Oza Pawandeep
2017-06-11  4:05 ` [PATCH v3 1/2] PCI: iproc: Retry request when CRS returned from EP Oza Pawandeep
2017-06-12 23:30   ` Bjorn Helgaas
2017-06-13  4:28     ` Oza Oza
2017-06-13  5:40       ` Oza Oza
2017-06-19 22:39       ` Bjorn Helgaas
2017-06-20 12:44         ` Oza Oza
2017-06-11  4:05 ` [PATCH v3 2/2] PCI: iproc: add device shutdown for PCI RC Oza Pawandeep
2017-06-12 23:43   ` Bjorn Helgaas
2017-06-14  4:54     ` Oza Oza
2017-06-15 13:41       ` Bjorn Helgaas
2017-06-21  8:04         ` Oza Oza

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).