linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [v4] PCI: Avoid unsync of LTR mechanism configuration
@ 2021-02-04  9:51 mingchuang.qiao
  2021-02-04 10:00 ` Mika Westerberg
  2021-02-18 16:50 ` Bjorn Helgaas
  0 siblings, 2 replies; 10+ messages in thread
From: mingchuang.qiao @ 2021-02-04  9:51 UTC (permalink / raw)
  To: bhelgaas, matthias.bgg
  Cc: linux-pci, linux-kernel, linux-arm-kernel, linux-mediatek,
	mingchuang.qiao, haijun.liu, lambert.wang, kerun.zhu,
	mika.westerberg, alex.williamson, rjw, utkarsh.h.patel

From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>

In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2 register is
configured in pci_configure_ltr(). If device and bridge both support LTR
mechanism, the "LTR Mechanism Enable" bit of device and bridge will be
enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as 1.

If PCIe link goes down when device resets, the "LTR Mechanism Enable" bit
of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16. However,
the pci_dev->ltr_path value of bridge is still 1.

For following conditions, check and re-configure "LTR Mechanism Enable" bit
of bridge to make "LTR Mechanism Enable" bit match ltr_path value.
   -before configuring device's LTR for hot-remove/hot-add
   -before restoring device's DEVCTL2 register when restore device state

Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
---
changes of v4
 -fix typo of commit message
 -rename: pci_reconfigure_bridge_ltr()->pci_bridge_reconfigure_ltr()
changes of v3
 -call pci_reconfigure_bridge_ltr() in probe.c
changes of v2
 -modify patch description
 -reconfigure bridge's LTR before restoring device DEVCTL2 register
---
 drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
 drivers/pci/pci.h   |  1 +
 drivers/pci/probe.c | 13 ++++++++++---
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b9fecc25d213..6bf65d295331 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 	return 0;
 }
 
+void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCIEASPM
+	struct pci_dev *bridge;
+	u32 ctl;
+
+	bridge = pci_upstream_bridge(dev);
+	if (bridge && bridge->ltr_path) {
+		pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl);
+		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
+			pci_dbg(bridge, "re-enabling LTR\n");
+			pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2,
+						 PCI_EXP_DEVCTL2_LTR_EN);
+		}
+	}
+#endif
+}
+
 static void pci_restore_pcie_state(struct pci_dev *dev)
 {
 	int i = 0;
@@ -1447,6 +1465,13 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	if (!save_state)
 		return;
 
+	/*
+	 * Downstream ports reset the LTR enable bit when link goes down.
+	 * Check and re-configure the bit here before restoring device.
+	 * PCIe r5.0, sec 7.5.3.16.
+	 */
+	pci_bridge_reconfigure_ltr(dev);
+
 	cap = (u16 *)&save_state->cap.data[0];
 	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, cap[i++]);
 	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 5c59365092fa..b3a5e5287cb7 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct pci_dev *dev);
 bool pci_bridge_d3_possible(struct pci_dev *dev);
 void pci_bridge_d3_update(struct pci_dev *dev);
 void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
+void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
 
 static inline void pci_wakeup_event(struct pci_dev *dev)
 {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 953f15abc850..ade055e9fb58 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct pci_dev *dev)
 	 * Complex and all intermediate Switches indicate support for LTR.
 	 * PCIe r4.0, sec 6.18.
 	 */
-	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
-	    ((bridge = pci_upstream_bridge(dev)) &&
-	      bridge->ltr_path)) {
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
+					 PCI_EXP_DEVCTL2_LTR_EN);
+		dev->ltr_path = 1;
+		return;
+	}
+
+	bridge = pci_upstream_bridge(dev);
+	if (bridge && bridge->ltr_path) {
+		pci_bridge_reconfigure_ltr(dev);
 		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
 					 PCI_EXP_DEVCTL2_LTR_EN);
 		dev->ltr_path = 1;
-- 
2.18.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-02-04  9:51 [v4] PCI: Avoid unsync of LTR mechanism configuration mingchuang.qiao
@ 2021-02-04 10:00 ` Mika Westerberg
  2021-02-18 16:50 ` Bjorn Helgaas
  1 sibling, 0 replies; 10+ messages in thread
From: Mika Westerberg @ 2021-02-04 10:00 UTC (permalink / raw)
  To: mingchuang.qiao
  Cc: bhelgaas, matthias.bgg, linux-pci, linux-kernel,
	linux-arm-kernel, linux-mediatek, haijun.liu, lambert.wang,
	kerun.zhu, alex.williamson, rjw, utkarsh.h.patel

On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@mediatek.com wrote:
> From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> 
> In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2 register is
> configured in pci_configure_ltr(). If device and bridge both support LTR
> mechanism, the "LTR Mechanism Enable" bit of device and bridge will be
> enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as 1.
> 
> If PCIe link goes down when device resets, the "LTR Mechanism Enable" bit
> of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16. However,
> the pci_dev->ltr_path value of bridge is still 1.
> 
> For following conditions, check and re-configure "LTR Mechanism Enable" bit
> of bridge to make "LTR Mechanism Enable" bit match ltr_path value.
>    -before configuring device's LTR for hot-remove/hot-add
>    -before restoring device's DEVCTL2 register when restore device state
> 
> Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-02-04  9:51 [v4] PCI: Avoid unsync of LTR mechanism configuration mingchuang.qiao
  2021-02-04 10:00 ` Mika Westerberg
@ 2021-02-18 16:50 ` Bjorn Helgaas
  2021-03-22  0:41   ` Mingchuang Qiao
  2021-09-06  5:36   ` mingchuang qiao
  1 sibling, 2 replies; 10+ messages in thread
From: Bjorn Helgaas @ 2021-02-18 16:50 UTC (permalink / raw)
  To: mingchuang.qiao
  Cc: bhelgaas, matthias.bgg, linux-pci, linux-kernel,
	linux-arm-kernel, linux-mediatek, haijun.liu, lambert.wang,
	kerun.zhu, mika.westerberg, alex.williamson, rjw,
	utkarsh.h.patel

On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@mediatek.com wrote:
> From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> 
> In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2 register is
> configured in pci_configure_ltr(). If device and bridge both support LTR
> mechanism, the "LTR Mechanism Enable" bit of device and bridge will be
> enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as 1.
> 
> If PCIe link goes down when device resets, the "LTR Mechanism Enable" bit
> of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16. However,
> the pci_dev->ltr_path value of bridge is still 1.
> 
> For following conditions, check and re-configure "LTR Mechanism Enable" bit
> of bridge to make "LTR Mechanism Enable" bit match ltr_path value.
>    -before configuring device's LTR for hot-remove/hot-add
>    -before restoring device's DEVCTL2 register when restore device state

There's definitely a bug here.  The commit log should say a little
more about what it is.  I *think* if LTR is enabled and we suspend
(putting the device in D3cold) and resume, LTR probably doesn't work
after resume because LTR is disabled in the upstream bridge, which
would be an obvious bug.

Also, if a device with LTR enabled is hot-removed, and we hot-add a
device, I think LTR will not work on the new device.  Possibly also a
bug, although I'm not convinced we know how to configure LTR on the
new device anyway.

So I'd *like* to merge the bug fix for v5.12, but I think I'll wait
because of the issue below.

> Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> ---
> changes of v4
>  -fix typo of commit message
>  -rename: pci_reconfigure_bridge_ltr()->pci_bridge_reconfigure_ltr()
> changes of v3
>  -call pci_reconfigure_bridge_ltr() in probe.c
> changes of v2
>  -modify patch description
>  -reconfigure bridge's LTR before restoring device DEVCTL2 register
> ---
>  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
>  drivers/pci/pci.h   |  1 +
>  drivers/pci/probe.c | 13 ++++++++++---
>  3 files changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b9fecc25d213..6bf65d295331 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct pci_dev *dev)
>  	return 0;
>  }
>  
> +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> +{
> +#ifdef CONFIG_PCIEASPM
> +	struct pci_dev *bridge;
> +	u32 ctl;
> +
> +	bridge = pci_upstream_bridge(dev);
> +	if (bridge && bridge->ltr_path) {
> +		pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl);
> +		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> +			pci_dbg(bridge, "re-enabling LTR\n");
> +			pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2,
> +						 PCI_EXP_DEVCTL2_LTR_EN);

This pattern of updating the upstream bridge on behalf of "dev" is
problematic because it's racy:

  CPU 1                     CPU 2
  -------------------       ---------------------
  ctl = read DEVCTL2        ctl = read(DEVCTL2)
  ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
  write(DEVCTL2, ctl)
                            write(DEVCTL2, ctl)

Now the bridge has ARI set, but not LTR_EN.

We have the same problem in the pci_enable_device() path.  The most
recent try at fixing it is [1].

[1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-s.miroshnichenko@yadro.com/

> +		}
> +	}
> +#endif
> +}
> +
>  static void pci_restore_pcie_state(struct pci_dev *dev)
>  {
>  	int i = 0;
> @@ -1447,6 +1465,13 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
>  	if (!save_state)
>  		return;
>  
> +	/*
> +	 * Downstream ports reset the LTR enable bit when link goes down.
> +	 * Check and re-configure the bit here before restoring device.
> +	 * PCIe r5.0, sec 7.5.3.16.
> +	 */
> +	pci_bridge_reconfigure_ltr(dev);
> +
>  	cap = (u16 *)&save_state->cap.data[0];
>  	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, cap[i++]);
>  	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 5c59365092fa..b3a5e5287cb7 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct pci_dev *dev);
>  bool pci_bridge_d3_possible(struct pci_dev *dev);
>  void pci_bridge_d3_update(struct pci_dev *dev);
>  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
>  
>  static inline void pci_wakeup_event(struct pci_dev *dev)
>  {
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 953f15abc850..ade055e9fb58 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct pci_dev *dev)
>  	 * Complex and all intermediate Switches indicate support for LTR.
>  	 * PCIe r4.0, sec 6.18.
>  	 */
> -	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> -	    ((bridge = pci_upstream_bridge(dev)) &&
> -	      bridge->ltr_path)) {
> +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> +		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> +					 PCI_EXP_DEVCTL2_LTR_EN);
> +		dev->ltr_path = 1;
> +		return;
> +	}
> +
> +	bridge = pci_upstream_bridge(dev);
> +	if (bridge && bridge->ltr_path) {
> +		pci_bridge_reconfigure_ltr(dev);
>  		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
>  					 PCI_EXP_DEVCTL2_LTR_EN);
>  		dev->ltr_path = 1;
> -- 
> 2.18.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-02-18 16:50 ` Bjorn Helgaas
@ 2021-03-22  0:41   ` Mingchuang Qiao
  2021-09-06  5:36   ` mingchuang qiao
  1 sibling, 0 replies; 10+ messages in thread
From: Mingchuang Qiao @ 2021-03-22  0:41 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: kerun.zhu, linux-pci, lambert.wang, rjw, linux-kernel,
	matthias.bgg, alex.williamson, linux-mediatek, utkarsh.h.patel,
	haijun.liu, bhelgaas, mika.westerberg, linux-arm-kernel

Hi Bjorn,

On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@mediatek.com wrote:
> > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > 
> > In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2 register is
> > configured in pci_configure_ltr(). If device and bridge both support LTR
> > mechanism, the "LTR Mechanism Enable" bit of device and bridge will be
> > enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as 1.
> > 
> > If PCIe link goes down when device resets, the "LTR Mechanism Enable" bit
> > of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16. However,
> > the pci_dev->ltr_path value of bridge is still 1.
> > 
> > For following conditions, check and re-configure "LTR Mechanism Enable" bit
> > of bridge to make "LTR Mechanism Enable" bit match ltr_path value.
> >    -before configuring device's LTR for hot-remove/hot-add
> >    -before restoring device's DEVCTL2 register when restore device state
> 
> There's definitely a bug here.  The commit log should say a little
> more about what it is.  I *think* if LTR is enabled and we suspend
> (putting the device in D3cold) and resume, LTR probably doesn't work
> after resume because LTR is disabled in the upstream bridge, which
> would be an obvious bug.
> 
> Also, if a device with LTR enabled is hot-removed, and we hot-add a
> device, I think LTR will not work on the new device.  Possibly also a
> bug, although I'm not convinced we know how to configure LTR on the
> new device anyway.
> 
> So I'd *like* to merge the bug fix for v5.12, but I think I'll wait
> because of the issue below.
> 

Thanks for review.
Any further process shall I make to get this patch merged?

> > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > ---
> > changes of v4
> >  -fix typo of commit message
> >  -rename: pci_reconfigure_bridge_ltr()->pci_bridge_reconfigure_ltr()
> > changes of v3
> >  -call pci_reconfigure_bridge_ltr() in probe.c
> > changes of v2
> >  -modify patch description
> >  -reconfigure bridge's LTR before restoring device DEVCTL2 register
> > ---
> >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> >  drivers/pci/pci.h   |  1 +
> >  drivers/pci/probe.c | 13 ++++++++++---
> >  3 files changed, 36 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index b9fecc25d213..6bf65d295331 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct pci_dev *dev)
> >  	return 0;
> >  }
> >  
> > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > +{
> > +#ifdef CONFIG_PCIEASPM
> > +	struct pci_dev *bridge;
> > +	u32 ctl;
> > +
> > +	bridge = pci_upstream_bridge(dev);
> > +	if (bridge && bridge->ltr_path) {
> > +		pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl);
> > +		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > +			pci_dbg(bridge, "re-enabling LTR\n");
> > +			pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2,
> > +						 PCI_EXP_DEVCTL2_LTR_EN);
> 
> This pattern of updating the upstream bridge on behalf of "dev" is
> problematic because it's racy:
> 
>   CPU 1                     CPU 2
>   -------------------       ---------------------
>   ctl = read DEVCTL2        ctl = read(DEVCTL2)
>   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
>   write(DEVCTL2, ctl)
>                             write(DEVCTL2, ctl)
> 
> Now the bridge has ARI set, but not LTR_EN.
> 
> We have the same problem in the pci_enable_device() path.  The most
> recent try at fixing it is [1].
> 
> [1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-s.miroshnichenko@yadro.com/
> 
> > +		}
> > +	}
> > +#endif
> > +}
> > +
> >  static void pci_restore_pcie_state(struct pci_dev *dev)
> >  {
> >  	int i = 0;
> > @@ -1447,6 +1465,13 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
> >  	if (!save_state)
> >  		return;
> >  
> > +	/*
> > +	 * Downstream ports reset the LTR enable bit when link goes down.
> > +	 * Check and re-configure the bit here before restoring device.
> > +	 * PCIe r5.0, sec 7.5.3.16.
> > +	 */
> > +	pci_bridge_reconfigure_ltr(dev);
> > +
> >  	cap = (u16 *)&save_state->cap.data[0];
> >  	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, cap[i++]);
> >  	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index 5c59365092fa..b3a5e5287cb7 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct pci_dev *dev);
> >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> >  void pci_bridge_d3_update(struct pci_dev *dev);
> >  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> >  
> >  static inline void pci_wakeup_event(struct pci_dev *dev)
> >  {
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index 953f15abc850..ade055e9fb58 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct pci_dev *dev)
> >  	 * Complex and all intermediate Switches indicate support for LTR.
> >  	 * PCIe r4.0, sec 6.18.
> >  	 */
> > -	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > -	    ((bridge = pci_upstream_bridge(dev)) &&
> > -	      bridge->ltr_path)) {
> > +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > +		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> > +					 PCI_EXP_DEVCTL2_LTR_EN);
> > +		dev->ltr_path = 1;
> > +		return;
> > +	}
> > +
> > +	bridge = pci_upstream_bridge(dev);
> > +	if (bridge && bridge->ltr_path) {
> > +		pci_bridge_reconfigure_ltr(dev);
> >  		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> >  					 PCI_EXP_DEVCTL2_LTR_EN);
> >  		dev->ltr_path = 1;
> > -- 
> > 2.18.0
> 
> _______________________________________________
> Linux-mediatek mailing list
> Linux-mediatek@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-02-18 16:50 ` Bjorn Helgaas
  2021-03-22  0:41   ` Mingchuang Qiao
@ 2021-09-06  5:36   ` mingchuang qiao
  2021-09-30  7:02     ` mingchuang qiao
  1 sibling, 1 reply; 10+ messages in thread
From: mingchuang qiao @ 2021-09-06  5:36 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: kerun.zhu, linux-pci, lambert.wang, rjw, linux-kernel,
	matthias.bgg, alex.williamson, linux-mediatek, utkarsh.h.patel,
	haijun.liu, bhelgaas, mika.westerberg, linux-arm-kernel,
	mingchuang.qiao

Hi Bjorn,

On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@mediatek.co
> m wrote:
> > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > 
> > In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2
> > register is
> > configured in pci_configure_ltr(). If device and bridge both
> > support LTR
> > mechanism, the "LTR Mechanism Enable" bit of device and bridge will
> > be
> > enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as
> > 1.
> > 
> > If PCIe link goes down when device resets, the "LTR Mechanism
> > Enable" bit
> > of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16.
> > However,
> > the pci_dev->ltr_path value of bridge is still 1.
> > 
> > For following conditions, check and re-configure "LTR Mechanism
> > Enable" bit
> > of bridge to make "LTR Mechanism Enable" bit match ltr_path value.
> >    -before configuring device's LTR for hot-remove/hot-add
> >    -before restoring device's DEVCTL2 register when restore device
> > state
> 
> There's definitely a bug here.  The commit log should say a little
> more about what it is.  I *think* if LTR is enabled and we suspend
> (putting the device in D3cold) and resume, LTR probably doesn't work
> after resume because LTR is disabled in the upstream bridge, which
> would be an obvious bug.
> 
> Also, if a device with LTR enabled is hot-removed, and we hot-add a
> device, I think LTR will not work on the new device.  Possibly also a
> bug, although I'm not convinced we know how to configure LTR on the
> new device anyway.
> 
> So I'd *like* to merge the bug fix for v5.12, but I think I'll wait
> because of the issue below.
> 

A friendly ping.
Any further process shall I make to get this patch merged?

> > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > ---
> > changes of v4
> >  -fix typo of commit message
> >  -rename: pci_reconfigure_bridge_ltr()-
> > >pci_bridge_reconfigure_ltr()
> > changes of v3
> >  -call pci_reconfigure_bridge_ltr() in probe.c
> > changes of v2
> >  -modify patch description
> >  -reconfigure bridge's LTR before restoring device DEVCTL2 register
> > ---
> >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> >  drivers/pci/pci.h   |  1 +
> >  drivers/pci/probe.c | 13 ++++++++++---
> >  3 files changed, 36 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index b9fecc25d213..6bf65d295331 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct
> > pci_dev *dev)
> >  	return 0;
> >  }
> >  
> > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > +{
> > +#ifdef CONFIG_PCIEASPM
> > +	struct pci_dev *bridge;
> > +	u32 ctl;
> > +
> > +	bridge = pci_upstream_bridge(dev);
> > +	if (bridge && bridge->ltr_path) {
> > +		pcie_capability_read_dword(bridge,
> > PCI_EXP_DEVCTL2, &ctl);
> > +		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > +			pci_dbg(bridge, "re-enabling LTR\n");
> > +			pcie_capability_set_word(bridge,
> > PCI_EXP_DEVCTL2,
> > +						 PCI_EXP_DEVCTL2_L
> > TR_EN);
> 
> This pattern of updating the upstream bridge on behalf of "dev" is
> problematic because it's racy:
> 
>   CPU 1                     CPU 2
>   -------------------       ---------------------
>   ctl = read DEVCTL2        ctl = read(DEVCTL2)
>   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
>   write(DEVCTL2, ctl)
>                             write(DEVCTL2, ctl)
> 
> Now the bridge has ARI set, but not LTR_EN.
> 
> We have the same problem in the pci_enable_device() path.  The most
> recent try at fixing it is [1].
> 
> [1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-s.miros
> hnichenko@yadro.com/
> 
> > +		}
> > +	}
> > +#endif
> > +}
> > +
> >  static void pci_restore_pcie_state(struct pci_dev *dev)
> >  {
> >  	int i = 0;
> > @@ -1447,6 +1465,13 @@ static void pci_restore_pcie_state(struct
> > pci_dev *dev)
> >  	if (!save_state)
> >  		return;
> >  
> > +	/*
> > +	 * Downstream ports reset the LTR enable bit when link
> > goes down.
> > +	 * Check and re-configure the bit here before restoring
> > device.
> > +	 * PCIe r5.0, sec 7.5.3.16.
> > +	 */
> > +	pci_bridge_reconfigure_ltr(dev);
> > +
> >  	cap = (u16 *)&save_state->cap.data[0];
> >  	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, cap[i++]);
> >  	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index 5c59365092fa..b3a5e5287cb7 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct pci_dev
> > *dev);
> >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> >  void pci_bridge_d3_update(struct pci_dev *dev);
> >  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> >  
> >  static inline void pci_wakeup_event(struct pci_dev *dev)
> >  {
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index 953f15abc850..ade055e9fb58 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct pci_dev
> > *dev)
> >  	 * Complex and all intermediate Switches indicate support
> > for LTR.
> >  	 * PCIe r4.0, sec 6.18.
> >  	 */
> > -	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > -	    ((bridge = pci_upstream_bridge(dev)) &&
> > -	      bridge->ltr_path)) {
> > +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > +		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> > +					 PCI_EXP_DEVCTL2_LTR_EN);
> > +		dev->ltr_path = 1;
> > +		return;
> > +	}
> > +
> > +	bridge = pci_upstream_bridge(dev);
> > +	if (bridge && bridge->ltr_path) {
> > +		pci_bridge_reconfigure_ltr(dev);
> >  		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> >  					 PCI_EXP_DEVCTL2_LTR_EN);
> >  		dev->ltr_path = 1;
> > -- 
> > 2.18.0
> 
> _______________________________________________
> Linux-mediatek mailing list
> Linux-mediatek@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-09-06  5:36   ` mingchuang qiao
@ 2021-09-30  7:02     ` mingchuang qiao
  2021-09-30 19:48       ` Bjorn Helgaas
  0 siblings, 1 reply; 10+ messages in thread
From: mingchuang qiao @ 2021-09-30  7:02 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: kerun.zhu, linux-pci, lambert.wang, rjw, linux-kernel,
	matthias.bgg, alex.williamson, linux-mediatek, utkarsh.h.patel,
	haijun.liu, mingchuang.qiao, bhelgaas, mika.westerberg,
	linux-arm-kernel

Hi Bjorn,

A friendly ping.
Thanks.

On Mon, 2021-09-06 at 13:36 +0800, mingchuang qiao wrote:
> Hi Bjorn,
> 
> On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> > On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@mediatek.
> > co
> > m wrote:
> > > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > 
> > > In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2
> > > register is
> > > configured in pci_configure_ltr(). If device and bridge both
> > > support LTR
> > > mechanism, the "LTR Mechanism Enable" bit of device and bridge
> > > will
> > > be
> > > enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as
> > > 1.
> > > 
> > > If PCIe link goes down when device resets, the "LTR Mechanism
> > > Enable" bit
> > > of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16.
> > > However,
> > > the pci_dev->ltr_path value of bridge is still 1.
> > > 
> > > For following conditions, check and re-configure "LTR Mechanism
> > > Enable" bit
> > > of bridge to make "LTR Mechanism Enable" bit match ltr_path
> > > value.
> > >    -before configuring device's LTR for hot-remove/hot-add
> > >    -before restoring device's DEVCTL2 register when restore
> > > device
> > > state
> > 
> > There's definitely a bug here.  The commit log should say a little
> > more about what it is.  I *think* if LTR is enabled and we suspend
> > (putting the device in D3cold) and resume, LTR probably doesn't
> > work
> > after resume because LTR is disabled in the upstream bridge, which
> > would be an obvious bug.
> > 
> > Also, if a device with LTR enabled is hot-removed, and we hot-add a
> > device, I think LTR will not work on the new device.  Possibly also
> > a
> > bug, although I'm not convinced we know how to configure LTR on the
> > new device anyway.
> > 
> > So I'd *like* to merge the bug fix for v5.12, but I think I'll wait
> > because of the issue below.
> > 
> 
> A friendly ping.
> Any further process shall I make to get this patch merged?
> 
> > > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > ---
> > > changes of v4
> > >  -fix typo of commit message
> > >  -rename: pci_reconfigure_bridge_ltr()-
> > > > pci_bridge_reconfigure_ltr()
> > > 
> > > changes of v3
> > >  -call pci_reconfigure_bridge_ltr() in probe.c
> > > changes of v2
> > >  -modify patch description
> > >  -reconfigure bridge's LTR before restoring device DEVCTL2
> > > register
> > > ---
> > >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> > >  drivers/pci/pci.h   |  1 +
> > >  drivers/pci/probe.c | 13 ++++++++++---
> > >  3 files changed, 36 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > > index b9fecc25d213..6bf65d295331 100644
> > > --- a/drivers/pci/pci.c
> > > +++ b/drivers/pci/pci.c
> > > @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct
> > > pci_dev *dev)
> > >  	return 0;
> > >  }
> > >  
> > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > > +{
> > > +#ifdef CONFIG_PCIEASPM
> > > +	struct pci_dev *bridge;
> > > +	u32 ctl;
> > > +
> > > +	bridge = pci_upstream_bridge(dev);
> > > +	if (bridge && bridge->ltr_path) {
> > > +		pcie_capability_read_dword(bridge,
> > > PCI_EXP_DEVCTL2, &ctl);
> > > +		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > > +			pci_dbg(bridge, "re-enabling LTR\n");
> > > +			pcie_capability_set_word(bridge,
> > > PCI_EXP_DEVCTL2,
> > > +						 PCI_EXP_DEVCTL2
> > > _L
> > > TR_EN);
> > 
> > This pattern of updating the upstream bridge on behalf of "dev" is
> > problematic because it's racy:
> > 
> >   CPU 1                     CPU 2
> >   -------------------       ---------------------
> >   ctl = read DEVCTL2        ctl = read(DEVCTL2)
> >   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
> >   write(DEVCTL2, ctl)
> >                             write(DEVCTL2, ctl)
> > 
> > Now the bridge has ARI set, but not LTR_EN.
> > 
> > We have the same problem in the pci_enable_device() path.  The most
> > recent try at fixing it is [1].
> > 
> > [1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-s.mir
> > os
> > hnichenko@yadro.com/
> > 
> > > +		}
> > > +	}
> > > +#endif
> > > +}
> > > +
> > >  static void pci_restore_pcie_state(struct pci_dev *dev)
> > >  {
> > >  	int i = 0;
> > > @@ -1447,6 +1465,13 @@ static void pci_restore_pcie_state(struct
> > > pci_dev *dev)
> > >  	if (!save_state)
> > >  		return;
> > >  
> > > +	/*
> > > +	 * Downstream ports reset the LTR enable bit when link
> > > goes down.
> > > +	 * Check and re-configure the bit here before restoring
> > > device.
> > > +	 * PCIe r5.0, sec 7.5.3.16.
> > > +	 */
> > > +	pci_bridge_reconfigure_ltr(dev);
> > > +
> > >  	cap = (u16 *)&save_state->cap.data[0];
> > >  	pcie_capability_write_word(dev, PCI_EXP_DEVCTL,
> > > cap[i++]);
> > >  	pcie_capability_write_word(dev, PCI_EXP_LNKCTL,
> > > cap[i++]);
> > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > index 5c59365092fa..b3a5e5287cb7 100644
> > > --- a/drivers/pci/pci.h
> > > +++ b/drivers/pci/pci.h
> > > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct pci_dev
> > > *dev);
> > >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> > >  void pci_bridge_d3_update(struct pci_dev *dev);
> > >  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> > >  
> > >  static inline void pci_wakeup_event(struct pci_dev *dev)
> > >  {
> > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > index 953f15abc850..ade055e9fb58 100644
> > > --- a/drivers/pci/probe.c
> > > +++ b/drivers/pci/probe.c
> > > @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct
> > > pci_dev
> > > *dev)
> > >  	 * Complex and all intermediate Switches indicate
> > > support
> > > for LTR.
> > >  	 * PCIe r4.0, sec 6.18.
> > >  	 */
> > > -	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > > -	    ((bridge = pci_upstream_bridge(dev)) &&
> > > -	      bridge->ltr_path)) {
> > > +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > > +		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> > > +					 PCI_EXP_DEVCTL2_LTR_EN)
> > > ;
> > > +		dev->ltr_path = 1;
> > > +		return;
> > > +	}
> > > +
> > > +	bridge = pci_upstream_bridge(dev);
> > > +	if (bridge && bridge->ltr_path) {
> > > +		pci_bridge_reconfigure_ltr(dev);
> > >  		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> > >  					 PCI_EXP_DEVCTL2_LTR_EN)
> > > ;
> > >  		dev->ltr_path = 1;
> > > -- 
> > > 2.18.0
> > 
> > _______________________________________________
> > Linux-mediatek mailing list
> > Linux-mediatek@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-mediatek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-09-30  7:02     ` mingchuang qiao
@ 2021-09-30 19:48       ` Bjorn Helgaas
  2021-10-08  6:30         ` mingchuang qiao
  0 siblings, 1 reply; 10+ messages in thread
From: Bjorn Helgaas @ 2021-09-30 19:48 UTC (permalink / raw)
  To: mingchuang qiao
  Cc: kerun.zhu, linux-pci, lambert.wang, rjw, linux-kernel,
	matthias.bgg, alex.williamson, linux-mediatek, utkarsh.h.patel,
	haijun.liu, bhelgaas, mika.westerberg, linux-arm-kernel

On Thu, Sep 30, 2021 at 03:02:24PM +0800, mingchuang qiao wrote:
> Hi Bjorn,
> 
> A friendly ping.
> Thanks.

I pointed out a couple issues, but you never responded.  See below.

> On Mon, 2021-09-06 at 13:36 +0800, mingchuang qiao wrote:
> > Hi Bjorn,
> > 
> > On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> > > On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@mediatek.
> > > co
> > > m wrote:
> > > > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > 
> > > > In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2
> > > > register is
> > > > configured in pci_configure_ltr(). If device and bridge both
> > > > support LTR
> > > > mechanism, the "LTR Mechanism Enable" bit of device and bridge
> > > > will
> > > > be
> > > > enabled in DEVCTL2 register. And pci_dev->ltr_path will be set as
> > > > 1.
> > > > 
> > > > If PCIe link goes down when device resets, the "LTR Mechanism
> > > > Enable" bit
> > > > of bridge will change to 0 according to PCIe r5.0, sec 7.5.3.16.
> > > > However,
> > > > the pci_dev->ltr_path value of bridge is still 1.
> > > > 
> > > > For following conditions, check and re-configure "LTR Mechanism
> > > > Enable" bit
> > > > of bridge to make "LTR Mechanism Enable" bit match ltr_path
> > > > value.
> > > >    -before configuring device's LTR for hot-remove/hot-add
> > > >    -before restoring device's DEVCTL2 register when restore
> > > > device
> > > > state
> > > 
> > > There's definitely a bug here.  The commit log should say a little
> > > more about what it is.  I *think* if LTR is enabled and we suspend
> > > (putting the device in D3cold) and resume, LTR probably doesn't
> > > work
> > > after resume because LTR is disabled in the upstream bridge, which
> > > would be an obvious bug.

Here's one thing.  Above I was asking for more details.  In
particular, how would a user notice this bug?  How did *you* notice
the bug?

> > > Also, if a device with LTR enabled is hot-removed, and we hot-add a
> > > device, I think LTR will not work on the new device.  Possibly also
> > > a
> > > bug, although I'm not convinced we know how to configure LTR on the
> > > new device anyway.
> > > 
> > > So I'd *like* to merge the bug fix for v5.12, but I think I'll wait
> > > because of the issue below.
> > > 
> > 
> > A friendly ping.
> > Any further process shall I make to get this patch merged?
> > 
> > > > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > ---
> > > > changes of v4
> > > >  -fix typo of commit message
> > > >  -rename: pci_reconfigure_bridge_ltr()-
> > > > > pci_bridge_reconfigure_ltr()
> > > > 
> > > > changes of v3
> > > >  -call pci_reconfigure_bridge_ltr() in probe.c
> > > > changes of v2
> > > >  -modify patch description
> > > >  -reconfigure bridge's LTR before restoring device DEVCTL2
> > > > register
> > > > ---
> > > >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> > > >  drivers/pci/pci.h   |  1 +
> > > >  drivers/pci/probe.c | 13 ++++++++++---
> > > >  3 files changed, 36 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > > > index b9fecc25d213..6bf65d295331 100644
> > > > --- a/drivers/pci/pci.c
> > > > +++ b/drivers/pci/pci.c
> > > > @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct
> > > > pci_dev *dev)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > > > +{
> > > > +#ifdef CONFIG_PCIEASPM
> > > > +	struct pci_dev *bridge;
> > > > +	u32 ctl;
> > > > +
> > > > +	bridge = pci_upstream_bridge(dev);
> > > > +	if (bridge && bridge->ltr_path) {
> > > > +		pcie_capability_read_dword(bridge,
> > > > PCI_EXP_DEVCTL2, &ctl);
> > > > +		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > > > +			pci_dbg(bridge, "re-enabling LTR\n");
> > > > +			pcie_capability_set_word(bridge,
> > > > PCI_EXP_DEVCTL2,
> > > > +						 PCI_EXP_DEVCTL2
> > > > _L
> > > > TR_EN);
> > > 
> > > This pattern of updating the upstream bridge on behalf of "dev" is
> > > problematic because it's racy:
> > > 
> > >   CPU 1                     CPU 2
> > >   -------------------       ---------------------
> > >   ctl = read DEVCTL2        ctl = read(DEVCTL2)
> > >   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
> > >   write(DEVCTL2, ctl)
> > >                             write(DEVCTL2, ctl)
> > > 
> > > Now the bridge has ARI set, but not LTR_EN.
> > > 
> > > We have the same problem in the pci_enable_device() path.  The most
> > > recent try at fixing it is [1].

I was hoping you would respond with "yes, I understand the problem,
but don't think it's likely" or "no, this isn't actually a problem
because ..."

I think it *is* a problem, but we're probably unlikely to hit it, so
we can probably live with it for now.  

> > > [1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-s.mir
> > > os
> > > hnichenko@yadro.com/
> > > 
> > > > +		}
> > > > +	}
> > > > +#endif
> > > > +}
> > > > +
> > > >  static void pci_restore_pcie_state(struct pci_dev *dev)
> > > >  {
> > > >  	int i = 0;
> > > > @@ -1447,6 +1465,13 @@ static void pci_restore_pcie_state(struct
> > > > pci_dev *dev)
> > > >  	if (!save_state)
> > > >  		return;
> > > >  
> > > > +	/*
> > > > +	 * Downstream ports reset the LTR enable bit when link
> > > > goes down.
> > > > +	 * Check and re-configure the bit here before restoring
> > > > device.
> > > > +	 * PCIe r5.0, sec 7.5.3.16.
> > > > +	 */
> > > > +	pci_bridge_reconfigure_ltr(dev);
> > > > +
> > > >  	cap = (u16 *)&save_state->cap.data[0];
> > > >  	pcie_capability_write_word(dev, PCI_EXP_DEVCTL,
> > > > cap[i++]);
> > > >  	pcie_capability_write_word(dev, PCI_EXP_LNKCTL,
> > > > cap[i++]);
> > > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > > index 5c59365092fa..b3a5e5287cb7 100644
> > > > --- a/drivers/pci/pci.h
> > > > +++ b/drivers/pci/pci.h
> > > > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct pci_dev
> > > > *dev);
> > > >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> > > >  void pci_bridge_d3_update(struct pci_dev *dev);
> > > >  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> > > >  
> > > >  static inline void pci_wakeup_event(struct pci_dev *dev)
> > > >  {
> > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > > index 953f15abc850..ade055e9fb58 100644
> > > > --- a/drivers/pci/probe.c
> > > > +++ b/drivers/pci/probe.c
> > > > @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct
> > > > pci_dev
> > > > *dev)
> > > >  	 * Complex and all intermediate Switches indicate
> > > > support
> > > > for LTR.
> > > >  	 * PCIe r4.0, sec 6.18.
> > > >  	 */
> > > > -	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > > > -	    ((bridge = pci_upstream_bridge(dev)) &&
> > > > -	      bridge->ltr_path)) {
> > > > +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > > > +		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> > > > +					 PCI_EXP_DEVCTL2_LTR_EN)
> > > > ;
> > > > +		dev->ltr_path = 1;
> > > > +		return;
> > > > +	}
> > > > +
> > > > +	bridge = pci_upstream_bridge(dev);
> > > > +	if (bridge && bridge->ltr_path) {
> > > > +		pci_bridge_reconfigure_ltr(dev);
> > > >  		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
> > > >  					 PCI_EXP_DEVCTL2_LTR_EN)
> > > > ;
> > > >  		dev->ltr_path = 1;
> > > > -- 
> > > > 2.18.0
> > > 
> > > _______________________________________________
> > > Linux-mediatek mailing list
> > > Linux-mediatek@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-mediatek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-09-30 19:48       ` Bjorn Helgaas
@ 2021-10-08  6:30         ` mingchuang qiao
  2021-10-12  2:48           ` Rajat Jain
  0 siblings, 1 reply; 10+ messages in thread
From: mingchuang qiao @ 2021-10-08  6:30 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: kerun.zhu, mingchuang.qiao, linux-pci, lambert.wang, rjw,
	linux-kernel, matthias.bgg, alex.williamson, linux-mediatek,
	utkarsh.h.patel, haijun.liu, bhelgaas, mika.westerberg,
	linux-arm-kernel

Hi Bjorn,

Much appreciate the comments. See below for my response.

On Thu, 2021-09-30 at 14:48 -0500, Bjorn Helgaas wrote:
> On Thu, Sep 30, 2021 at 03:02:24PM +0800, mingchuang qiao wrote:
> > Hi Bjorn,
> > 
> > A friendly ping.
> > Thanks.
> 
> I pointed out a couple issues, but you never responded.  See below.
> 
> > On Mon, 2021-09-06 at 13:36 +0800, mingchuang qiao wrote:
> > > Hi Bjorn,
> > > 
> > > On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> > > > On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@media
> > > > tek.
> > > > co
> > > > m wrote:
> > > > > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > > 
> > > > > In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2
> > > > > register is
> > > > > configured in pci_configure_ltr(). If device and bridge both
> > > > > support LTR
> > > > > mechanism, the "LTR Mechanism Enable" bit of device and
> > > > > bridge
> > > > > will
> > > > > be
> > > > > enabled in DEVCTL2 register. And pci_dev->ltr_path will be
> > > > > set as
> > > > > 1.
> > > > > 
> > > > > If PCIe link goes down when device resets, the "LTR Mechanism
> > > > > Enable" bit
> > > > > of bridge will change to 0 according to PCIe r5.0, sec
> > > > > 7.5.3.16.
> > > > > However,
> > > > > the pci_dev->ltr_path value of bridge is still 1.
> > > > > 
> > > > > For following conditions, check and re-configure "LTR
> > > > > Mechanism
> > > > > Enable" bit
> > > > > of bridge to make "LTR Mechanism Enable" bit match ltr_path
> > > > > value.
> > > > >    -before configuring device's LTR for hot-remove/hot-add
> > > > >    -before restoring device's DEVCTL2 register when restore
> > > > > device
> > > > > state
> > > > 
> > > > There's definitely a bug here.  The commit log should say a
> > > > little
> > > > more about what it is.  I *think* if LTR is enabled and we
> > > > suspend
> > > > (putting the device in D3cold) and resume, LTR probably doesn't
> > > > work
> > > > after resume because LTR is disabled in the upstream bridge,
> > > > which
> > > > would be an obvious bug.
> 
> Here's one thing.  Above I was asking for more details.  In
> particular, how would a user notice this bug?  How did *you* notice
> the bug?
> 

I will update more details in the commit log. 

For the suspend(D3 cold) and resume case, the LTR enable bit value
of bridge is saved(by pci_save_state()) in suspend flow and restored(by
pci_restore_state()) in resume flow. 
  -If link goes down after bridge already does pci_save_state()
    LTR could work after resume due to pci_restore_state() will enable
the LTR of bridge.
  -If link goes down before bridge does pci_save_state()
    LTR probably doesn't work after resume due to the LTR bit is
already disable when pci_save_state() and will not enable after
pci_restore_sate().

The sequence of link goes down and brdige suspend maybe platform
specific.
 
The issue is noticed by AER log as following shows.

pcieport 0000:00:1d.0: AER: Uncorrected (Non-Fatal) error received:
id=00e8
pcieport 0000:00:1d.0: PCIe Bus Error: severity=Uncorrected (Non-
Fatal), type=Transaction Layer, id=00e8(Requester ID)
pcieport 0000:00:1d.0:   device [8086:9d18] error
status/mask=00100000/00010000
pcieport 0000:00:1d.0:    [20] Unsupported Request    (First)
pcieport 0000:00:1d.0:   TLP Header: 34000000 03000010 00000000
00000000

> > > > Also, if a device with LTR enabled is hot-removed, and we hot-
> > > > add a
> > > > device, I think LTR will not work on the new device.  Possibly
> > > > also
> > > > a
> > > > bug, although I'm not convinced we know how to configure LTR on
> > > > the
> > > > new device anyway.
> > > > 
> > > > So I'd *like* to merge the bug fix for v5.12, but I think I'll
> > > > wait
> > > > because of the issue below.
> > > > 
> > > 
> > > A friendly ping.
> > > Any further process shall I make to get this patch merged?
> > > 
> > > > > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > > ---
> > > > > changes of v4
> > > > >  -fix typo of commit message
> > > > >  -rename: pci_reconfigure_bridge_ltr()-
> > > > > > pci_bridge_reconfigure_ltr()
> > > > > 
> > > > > changes of v3
> > > > >  -call pci_reconfigure_bridge_ltr() in probe.c
> > > > > changes of v2
> > > > >  -modify patch description
> > > > >  -reconfigure bridge's LTR before restoring device DEVCTL2
> > > > > register
> > > > > ---
> > > > >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> > > > >  drivers/pci/pci.h   |  1 +
> > > > >  drivers/pci/probe.c | 13 ++++++++++---
> > > > >  3 files changed, 36 insertions(+), 3 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > > > > index b9fecc25d213..6bf65d295331 100644
> > > > > --- a/drivers/pci/pci.c
> > > > > +++ b/drivers/pci/pci.c
> > > > > @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct
> > > > > pci_dev *dev)
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > > > > +{
> > > > > +#ifdef CONFIG_PCIEASPM
> > > > > +	struct pci_dev *bridge;
> > > > > +	u32 ctl;
> > > > > +
> > > > > +	bridge = pci_upstream_bridge(dev);
> > > > > +	if (bridge && bridge->ltr_path) {
> > > > > +		pcie_capability_read_dword(bridge,
> > > > > PCI_EXP_DEVCTL2, &ctl);
> > > > > +		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > > > > +			pci_dbg(bridge, "re-enabling
> > > > > LTR\n");
> > > > > +			pcie_capability_set_word(bridge,
> > > > > PCI_EXP_DEVCTL2,
> > > > > +						 PCI_EXP_DEV
> > > > > CTL2
> > > > > _L
> > > > > TR_EN);
> > > > 
> > > > This pattern of updating the upstream bridge on behalf of "dev"
> > > > is
> > > > problematic because it's racy:
> > > > 
> > > >   CPU 1                     CPU 2
> > > >   -------------------       ---------------------
> > > >   ctl = read DEVCTL2        ctl = read(DEVCTL2)
> > > >   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
> > > >   write(DEVCTL2, ctl)
> > > >                             write(DEVCTL2, ctl)
> > > > 
> > > > Now the bridge has ARI set, but not LTR_EN.
> > > > 
> > > > We have the same problem in the pci_enable_device() path.  The
> > > > most
> > > > recent try at fixing it is [1].
> 
> I was hoping you would respond with "yes, I understand the problem,
> but don't think it's likely" or "no, this isn't actually a problem
> because ..."
> 
> I think it *is* a problem, but we're probably unlikely to hit it, so
> we can probably live with it for now.  
> 

Yes, I understand the problem. I also think it unlikely to hit and we
can probably live with it for now.
Thanks. 

> > > > [1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-
> > > > s.mir
> > > > os
> > > > hnichenko@yadro.com/
> > > > 
> > > > > +		}
> > > > > +	}
> > > > > +#endif
> > > > > +}
> > > > > +
> > > > >  static void pci_restore_pcie_state(struct pci_dev *dev)
> > > > >  {
> > > > >  	int i = 0;
> > > > > @@ -1447,6 +1465,13 @@ static void
> > > > > pci_restore_pcie_state(struct
> > > > > pci_dev *dev)
> > > > >  	if (!save_state)
> > > > >  		return;
> > > > >  
> > > > > +	/*
> > > > > +	 * Downstream ports reset the LTR enable bit when
> > > > > link
> > > > > goes down.
> > > > > +	 * Check and re-configure the bit here before
> > > > > restoring
> > > > > device.
> > > > > +	 * PCIe r5.0, sec 7.5.3.16.
> > > > > +	 */
> > > > > +	pci_bridge_reconfigure_ltr(dev);
> > > > > +
> > > > >  	cap = (u16 *)&save_state->cap.data[0];
> > > > >  	pcie_capability_write_word(dev, PCI_EXP_DEVCTL,
> > > > > cap[i++]);
> > > > >  	pcie_capability_write_word(dev, PCI_EXP_LNKCTL,
> > > > > cap[i++]);
> > > > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > > > index 5c59365092fa..b3a5e5287cb7 100644
> > > > > --- a/drivers/pci/pci.h
> > > > > +++ b/drivers/pci/pci.h
> > > > > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct
> > > > > pci_dev
> > > > > *dev);
> > > > >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> > > > >  void pci_bridge_d3_update(struct pci_dev *dev);
> > > > >  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> > > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> > > > >  
> > > > >  static inline void pci_wakeup_event(struct pci_dev *dev)
> > > > >  {
> > > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > > > index 953f15abc850..ade055e9fb58 100644
> > > > > --- a/drivers/pci/probe.c
> > > > > +++ b/drivers/pci/probe.c
> > > > > @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct
> > > > > pci_dev
> > > > > *dev)
> > > > >  	 * Complex and all intermediate Switches indicate
> > > > > support
> > > > > for LTR.
> > > > >  	 * PCIe r4.0, sec 6.18.
> > > > >  	 */
> > > > > -	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > > > > -	    ((bridge = pci_upstream_bridge(dev)) &&
> > > > > -	      bridge->ltr_path)) {
> > > > > +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > > > > +		pcie_capability_set_word(dev,
> > > > > PCI_EXP_DEVCTL2,
> > > > > +					 PCI_EXP_DEVCTL2_LTR
> > > > > _EN)
> > > > > ;
> > > > > +		dev->ltr_path = 1;
> > > > > +		return;
> > > > > +	}
> > > > > +
> > > > > +	bridge = pci_upstream_bridge(dev);
> > > > > +	if (bridge && bridge->ltr_path) {
> > > > > +		pci_bridge_reconfigure_ltr(dev);
> > > > >  		pcie_capability_set_word(dev,
> > > > > PCI_EXP_DEVCTL2,
> > > > >  					 PCI_EXP_DEVCTL2_LTR
> > > > > _EN)
> > > > > ;
> > > > >  		dev->ltr_path = 1;
> > > > > -- 
> > > > > 2.18.0
> > > > 
> > > > _______________________________________________
> > > > Linux-mediatek mailing list
> > > > Linux-mediatek@lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/linux-mediatek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-10-08  6:30         ` mingchuang qiao
@ 2021-10-12  2:48           ` Rajat Jain
  2021-10-12  8:01             ` Mingchuang Qiao
  0 siblings, 1 reply; 10+ messages in thread
From: Rajat Jain @ 2021-10-12  2:48 UTC (permalink / raw)
  To: mingchuang qiao
  Cc: Bjorn Helgaas, kerun.zhu, linux-pci, lambert.wang, rjw,
	linux-kernel, matthias.bgg, alex.williamson, linux-mediatek,
	utkarsh.h.patel, haijun.liu, bhelgaas, mika.westerberg,
	linux-arm-kernel

Hello,


On Thu, Oct 7, 2021 at 11:30 PM mingchuang qiao
<mingchuang.qiao@mediatek.com> wrote:
>
> Hi Bjorn,
>
> Much appreciate the comments. See below for my response.
>
> On Thu, 2021-09-30 at 14:48 -0500, Bjorn Helgaas wrote:
> > On Thu, Sep 30, 2021 at 03:02:24PM +0800, mingchuang qiao wrote:
> > > Hi Bjorn,
> > >
> > > A friendly ping.
> > > Thanks.
> >
> > I pointed out a couple issues, but you never responded.  See below.
> >
> > > On Mon, 2021-09-06 at 13:36 +0800, mingchuang qiao wrote:
> > > > Hi Bjorn,
> > > >
> > > > On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> > > > > On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@media
> > > > > tek.
> > > > > co
> > > > > m wrote:
> > > > > > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > > >
> > > > > > In bus scan flow, the "LTR Mechanism Enable" bit of DEVCTL2
> > > > > > register is
> > > > > > configured in pci_configure_ltr(). If device and bridge both
> > > > > > support LTR
> > > > > > mechanism, the "LTR Mechanism Enable" bit of device and
> > > > > > bridge
> > > > > > will
> > > > > > be
> > > > > > enabled in DEVCTL2 register. And pci_dev->ltr_path will be
> > > > > > set as
> > > > > > 1.
> > > > > >
> > > > > > If PCIe link goes down when device resets, the "LTR Mechanism
> > > > > > Enable" bit
> > > > > > of bridge will change to 0 according to PCIe r5.0, sec
> > > > > > 7.5.3.16.
> > > > > > However,
> > > > > > the pci_dev->ltr_path value of bridge is still 1.
> > > > > >
> > > > > > For following conditions, check and re-configure "LTR
> > > > > > Mechanism
> > > > > > Enable" bit
> > > > > > of bridge to make "LTR Mechanism Enable" bit match ltr_path
> > > > > > value.
> > > > > >    -before configuring device's LTR for hot-remove/hot-add
> > > > > >    -before restoring device's DEVCTL2 register when restore
> > > > > > device
> > > > > > state
> > > > >
> > > > > There's definitely a bug here.  The commit log should say a
> > > > > little
> > > > > more about what it is.  I *think* if LTR is enabled and we
> > > > > suspend
> > > > > (putting the device in D3cold) and resume, LTR probably doesn't
> > > > > work
> > > > > after resume because LTR is disabled in the upstream bridge,
> > > > > which
> > > > > would be an obvious bug.
> >
> > Here's one thing.  Above I was asking for more details.  In
> > particular, how would a user notice this bug?  How did *you* notice
> > the bug?
> >
>
> I will update more details in the commit log.

Mingchuang: Can you please send a revised version of this patch with
enhanced log as Bjorn suggested.

If you'd like, you can add that this problem was also noticed when
PCIe devices (thunderbolt docks) were hot removed from chromebooks,
and then hot-plugged back again. Once hotplugged back, the newer Intel
chromebooks fail to go into S0ix low power state because of this LTR
issue, and this patch fixes that.

Bjorn: this was also proposed earlier (but the patch was never merged) here:
https://patchwork.kernel.org/project/linux-pci/patch/20210114134724.79511-1-mika.westerberg@linux.intel.com/
(It says "superceded", but I couldn't find the patch that superceded
Mika's patch. Perhaps it is *this* patch?)

>
> For the suspend(D3 cold) and resume case, the LTR enable bit value
> of bridge is saved(by pci_save_state()) in suspend flow and restored(by
> pci_restore_state()) in resume flow.
>   -If link goes down after bridge already does pci_save_state()
>     LTR could work after resume due to pci_restore_state() will enable
> the LTR of bridge.
>   -If link goes down before bridge does pci_save_state()
>     LTR probably doesn't work after resume due to the LTR bit is
> already disable when pci_save_state() and will not enable after
> pci_restore_sate().
>
> The sequence of link goes down and brdige suspend maybe platform
> specific.
>
> The issue is noticed by AER log as following shows.
>
> pcieport 0000:00:1d.0: AER: Uncorrected (Non-Fatal) error received:
> id=00e8
> pcieport 0000:00:1d.0: PCIe Bus Error: severity=Uncorrected (Non-
> Fatal), type=Transaction Layer, id=00e8(Requester ID)
> pcieport 0000:00:1d.0:   device [8086:9d18] error
> status/mask=00100000/00010000
> pcieport 0000:00:1d.0:    [20] Unsupported Request    (First)

Yes, this is expected, because an LTR message from a downstream device
shall be treated as unsupported request if LTR is disabled at the
rootport.

> pcieport 0000:00:1d.0:   TLP Header: 34000000 03000010 00000000
> 00000000
>
> > > > > Also, if a device with LTR enabled is hot-removed, and we hot-
> > > > > add a
> > > > > device, I think LTR will not work on the new device.  Possibly
> > > > > also
> > > > > a
> > > > > bug, although I'm not convinced we know how to configure LTR on
> > > > > the
> > > > > new device anyway.
> > > > >
> > > > > So I'd *like* to merge the bug fix for v5.12, but I think I'll
> > > > > wait
> > > > > because of the issue below.
> > > > >
> > > >
> > > > A friendly ping.
> > > > Any further process shall I make to get this patch merged?
> > > >
> > > > > > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > > > ---
> > > > > > changes of v4
> > > > > >  -fix typo of commit message
> > > > > >  -rename: pci_reconfigure_bridge_ltr()-
> > > > > > > pci_bridge_reconfigure_ltr()
> > > > > >
> > > > > > changes of v3
> > > > > >  -call pci_reconfigure_bridge_ltr() in probe.c
> > > > > > changes of v2
> > > > > >  -modify patch description
> > > > > >  -reconfigure bridge's LTR before restoring device DEVCTL2
> > > > > > register
> > > > > > ---
> > > > > >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> > > > > >  drivers/pci/pci.h   |  1 +
> > > > > >  drivers/pci/probe.c | 13 ++++++++++---
> > > > > >  3 files changed, 36 insertions(+), 3 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > > > > > index b9fecc25d213..6bf65d295331 100644
> > > > > > --- a/drivers/pci/pci.c
> > > > > > +++ b/drivers/pci/pci.c
> > > > > > @@ -1437,6 +1437,24 @@ static int pci_save_pcie_state(struct
> > > > > > pci_dev *dev)
> > > > > >       return 0;
> > > > > >  }
> > > > > >
> > > > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > > > > > +{
> > > > > > +#ifdef CONFIG_PCIEASPM
> > > > > > +     struct pci_dev *bridge;
> > > > > > +     u32 ctl;
> > > > > > +
> > > > > > +     bridge = pci_upstream_bridge(dev);
> > > > > > +     if (bridge && bridge->ltr_path) {
> > > > > > +             pcie_capability_read_dword(bridge,
> > > > > > PCI_EXP_DEVCTL2, &ctl);
> > > > > > +             if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > > > > > +                     pci_dbg(bridge, "re-enabling
> > > > > > LTR\n");
> > > > > > +                     pcie_capability_set_word(bridge,
> > > > > > PCI_EXP_DEVCTL2,
> > > > > > +                                              PCI_EXP_DEV
> > > > > > CTL2
> > > > > > _L
> > > > > > TR_EN);
> > > > >
> > > > > This pattern of updating the upstream bridge on behalf of "dev"
> > > > > is
> > > > > problematic because it's racy:
> > > > >
> > > > >   CPU 1                     CPU 2
> > > > >   -------------------       ---------------------
> > > > >   ctl = read DEVCTL2        ctl = read(DEVCTL2)
> > > > >   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
> > > > >   write(DEVCTL2, ctl)
> > > > >                             write(DEVCTL2, ctl)
> > > > >
> > > > > Now the bridge has ARI set, but not LTR_EN.
> > > > >
> > > > > We have the same problem in the pci_enable_device() path.  The
> > > > > most
> > > > > recent try at fixing it is [1].
> >
> > I was hoping you would respond with "yes, I understand the problem,
> > but don't think it's likely" or "no, this isn't actually a problem
> > because ..."
> >
> > I think it *is* a problem, but we're probably unlikely to hit it, so
> > we can probably live with it for now.
> >
>
> Yes, I understand the problem. I also think it unlikely to hit and we
> can probably live with it for now.
> Thanks.

Given that LTR applies to only PCI Express devices, and 2 of such
devices cannot be simultaneously hot-added under the same parent, I
think it is highly unlikely to hit.
I agree that it is a problem in general though. But It doesn't look
like we are not any close to a merging the other patch series Bjorn
pointed out (https://lore.kernel.org/linux-pci/20201218174011.340514-2-s.miroshnichenko@yadro.com/).

So perhaps we could merge this patch, and while this patch may not be
ideal, it helps in fixing the current set of issues seen with hotplug
of thunderbolt devices (which are very noticable on Intel chromebooks
atleast since it prevents them from going into S0ix)?

Thanks,

Rajat



>
> > > > > [1] https://lore.kernel.org/linux-pci/20201218174011.340514-2-
> > > > > s.mir
> > > > > os
> > > > > hnichenko@yadro.com/
> > > > >
> > > > > > +             }
> > > > > > +     }
> > > > > > +#endif
> > > > > > +}
> > > > > > +
> > > > > >  static void pci_restore_pcie_state(struct pci_dev *dev)
> > > > > >  {
> > > > > >       int i = 0;
> > > > > > @@ -1447,6 +1465,13 @@ static void
> > > > > > pci_restore_pcie_state(struct
> > > > > > pci_dev *dev)
> > > > > >       if (!save_state)
> > > > > >               return;
> > > > > >
> > > > > > +     /*
> > > > > > +      * Downstream ports reset the LTR enable bit when
> > > > > > link
> > > > > > goes down.
> > > > > > +      * Check and re-configure the bit here before
> > > > > > restoring
> > > > > > device.
> > > > > > +      * PCIe r5.0, sec 7.5.3.16.
> > > > > > +      */
> > > > > > +     pci_bridge_reconfigure_ltr(dev);
> > > > > > +
> > > > > >       cap = (u16 *)&save_state->cap.data[0];
> > > > > >       pcie_capability_write_word(dev, PCI_EXP_DEVCTL,
> > > > > > cap[i++]);
> > > > > >       pcie_capability_write_word(dev, PCI_EXP_LNKCTL,
> > > > > > cap[i++]);
> > > > > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > > > > index 5c59365092fa..b3a5e5287cb7 100644
> > > > > > --- a/drivers/pci/pci.h
> > > > > > +++ b/drivers/pci/pci.h
> > > > > > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct
> > > > > > pci_dev
> > > > > > *dev);
> > > > > >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> > > > > >  void pci_bridge_d3_update(struct pci_dev *dev);
> > > > > >  void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
> > > > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> > > > > >
> > > > > >  static inline void pci_wakeup_event(struct pci_dev *dev)
> > > > > >  {
> > > > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > > > > index 953f15abc850..ade055e9fb58 100644
> > > > > > --- a/drivers/pci/probe.c
> > > > > > +++ b/drivers/pci/probe.c
> > > > > > @@ -2132,9 +2132,16 @@ static void pci_configure_ltr(struct
> > > > > > pci_dev
> > > > > > *dev)
> > > > > >        * Complex and all intermediate Switches indicate
> > > > > > support
> > > > > > for LTR.
> > > > > >        * PCIe r4.0, sec 6.18.
> > > > > >        */
> > > > > > -     if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > > > > > -         ((bridge = pci_upstream_bridge(dev)) &&
> > > > > > -           bridge->ltr_path)) {
> > > > > > +     if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > > > > > +             pcie_capability_set_word(dev,
> > > > > > PCI_EXP_DEVCTL2,
> > > > > > +                                      PCI_EXP_DEVCTL2_LTR
> > > > > > _EN)
> > > > > > ;
> > > > > > +             dev->ltr_path = 1;
> > > > > > +             return;
> > > > > > +     }
> > > > > > +
> > > > > > +     bridge = pci_upstream_bridge(dev);
> > > > > > +     if (bridge && bridge->ltr_path) {
> > > > > > +             pci_bridge_reconfigure_ltr(dev);
> > > > > >               pcie_capability_set_word(dev,
> > > > > > PCI_EXP_DEVCTL2,
> > > > > >                                        PCI_EXP_DEVCTL2_LTR
> > > > > > _EN)
> > > > > > ;
> > > > > >               dev->ltr_path = 1;
> > > > > > --
> > > > > > 2.18.0
> > > > >
> > > > > _______________________________________________
> > > > > Linux-mediatek mailing list
> > > > > Linux-mediatek@lists.infradead.org
> > > > > http://lists.infradead.org/mailman/listinfo/linux-mediatek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v4] PCI: Avoid unsync of LTR mechanism configuration
  2021-10-12  2:48           ` Rajat Jain
@ 2021-10-12  8:01             ` Mingchuang Qiao
  0 siblings, 0 replies; 10+ messages in thread
From: Mingchuang Qiao @ 2021-10-12  8:01 UTC (permalink / raw)
  To: Rajat Jain
  Cc: Bjorn Helgaas, kerun.zhu, linux-pci, lambert.wang, rjw,
	linux-kernel, matthias.bgg, alex.williamson, linux-mediatek,
	utkarsh.h.patel, haijun.liu, bhelgaas, mika.westerberg,
	linux-arm-kernel

Hi,

On Mon, 2021-10-11 at 19:48 -0700, Rajat Jain wrote:
> Hello,
> 
> 
> On Thu, Oct 7, 2021 at 11:30 PM mingchuang qiao
> <mingchuang.qiao@mediatek.com> wrote:
> > 
> > Hi Bjorn,
> > 
> > Much appreciate the comments. See below for my response.
> > 
> > On Thu, 2021-09-30 at 14:48 -0500, Bjorn Helgaas wrote:
> > > On Thu, Sep 30, 2021 at 03:02:24PM +0800, mingchuang qiao wrote:
> > > > Hi Bjorn,
> > > > 
> > > > A friendly ping.
> > > > Thanks.
> > > 
> > > I pointed out a couple issues, but you never responded.  See
> > > below.
> > > 
> > > > On Mon, 2021-09-06 at 13:36 +0800, mingchuang qiao wrote:
> > > > > Hi Bjorn,
> > > > > 
> > > > > On Thu, 2021-02-18 at 10:50 -0600, Bjorn Helgaas wrote:
> > > > > > On Thu, Feb 04, 2021 at 05:51:25PM +0800, mingchuang.qiao@m
> > > > > > edia
> > > > > > tek.
> > > > > > co
> > > > > > m wrote:
> > > > > > > From: Mingchuang Qiao <mingchuang.qiao@mediatek.com>
> > > > > > > 
> > > > > > > In bus scan flow, the "LTR Mechanism Enable" bit of
> > > > > > > DEVCTL2
> > > > > > > register is
> > > > > > > configured in pci_configure_ltr(). If device and bridge
> > > > > > > both
> > > > > > > support LTR
> > > > > > > mechanism, the "LTR Mechanism Enable" bit of device and
> > > > > > > bridge
> > > > > > > will
> > > > > > > be
> > > > > > > enabled in DEVCTL2 register. And pci_dev->ltr_path will
> > > > > > > be
> > > > > > > set as
> > > > > > > 1.
> > > > > > > 
> > > > > > > If PCIe link goes down when device resets, the "LTR
> > > > > > > Mechanism
> > > > > > > Enable" bit
> > > > > > > of bridge will change to 0 according to PCIe r5.0, sec
> > > > > > > 7.5.3.16.
> > > > > > > However,
> > > > > > > the pci_dev->ltr_path value of bridge is still 1.
> > > > > > > 
> > > > > > > For following conditions, check and re-configure "LTR
> > > > > > > Mechanism
> > > > > > > Enable" bit
> > > > > > > of bridge to make "LTR Mechanism Enable" bit match
> > > > > > > ltr_path
> > > > > > > value.
> > > > > > >    -before configuring device's LTR for hot-remove/hot-
> > > > > > > add
> > > > > > >    -before restoring device's DEVCTL2 register when
> > > > > > > restore
> > > > > > > device
> > > > > > > state
> > > > > > 
> > > > > > There's definitely a bug here.  The commit log should say a
> > > > > > little
> > > > > > more about what it is.  I *think* if LTR is enabled and we
> > > > > > suspend
> > > > > > (putting the device in D3cold) and resume, LTR probably
> > > > > > doesn't
> > > > > > work
> > > > > > after resume because LTR is disabled in the upstream
> > > > > > bridge,
> > > > > > which
> > > > > > would be an obvious bug.
> > > 
> > > Here's one thing.  Above I was asking for more details.  In
> > > particular, how would a user notice this bug?  How did *you*
> > > notice
> > > the bug?
> > > 
> > 
> > I will update more details in the commit log.
> 
> Mingchuang: Can you please send a revised version of this patch with
> enhanced log as Bjorn suggested.
> 
> If you'd like, you can add that this problem was also noticed when
> PCIe devices (thunderbolt docks) were hot removed from chromebooks,
> and then hot-plugged back again. Once hotplugged back, the newer
> Intel
> chromebooks fail to go into S0ix low power state because of this LTR
> issue, and this patch fixes that.
> 

Thanks for your comments :)
I have sent a new version of this patch with more details in commit
log.

> Bjorn: this was also proposed earlier (but the patch was never
> merged) here:
> https://patchwork.kernel.org/project/linux-pci/patch/20210114134724.7
> 9511-1-mika.westerberg@linux.intel.com/
> (It says "superceded", but I couldn't find the patch that superceded
> Mika's patch. Perhaps it is *this* patch?)
> 
> > 
> > For the suspend(D3 cold) and resume case, the LTR enable bit value
> > of bridge is saved(by pci_save_state()) in suspend flow and
> > restored(by
> > pci_restore_state()) in resume flow.
> >   -If link goes down after bridge already does pci_save_state()
> >     LTR could work after resume due to pci_restore_state() will
> > enable
> > the LTR of bridge.
> >   -If link goes down before bridge does pci_save_state()
> >     LTR probably doesn't work after resume due to the LTR bit is
> > already disable when pci_save_state() and will not enable after
> > pci_restore_sate().
> > 
> > The sequence of link goes down and brdige suspend maybe platform
> > specific.
> > 
> > The issue is noticed by AER log as following shows.
> > 
> > pcieport 0000:00:1d.0: AER: Uncorrected (Non-Fatal) error received:
> > id=00e8
> > pcieport 0000:00:1d.0: PCIe Bus Error: severity=Uncorrected (Non-
> > Fatal), type=Transaction Layer, id=00e8(Requester ID)
> > pcieport 0000:00:1d.0:   device [8086:9d18] error
> > status/mask=00100000/00010000
> > pcieport 0000:00:1d.0:    [20] Unsupported Request    (First)
> 
> Yes, this is expected, because an LTR message from a downstream
> device
> shall be treated as unsupported request if LTR is disabled at the
> rootport.
> 
> > pcieport 0000:00:1d.0:   TLP Header: 34000000 03000010 00000000
> > 00000000
> > 
> > > > > > Also, if a device with LTR enabled is hot-removed, and we
> > > > > > hot-
> > > > > > add a
> > > > > > device, I think LTR will not work on the new
> > > > > > device.  Possibly
> > > > > > also
> > > > > > a
> > > > > > bug, although I'm not convinced we know how to configure
> > > > > > LTR on
> > > > > > the
> > > > > > new device anyway.
> > > > > > 
> > > > > > So I'd *like* to merge the bug fix for v5.12, but I think
> > > > > > I'll
> > > > > > wait
> > > > > > because of the issue below.
> > > > > > 
> > > > > 
> > > > > A friendly ping.
> > > > > Any further process shall I make to get this patch merged?
> > > > > 
> > > > > > > Signed-off-by: Mingchuang Qiao <mingchuang.qiao@mediatek.
> > > > > > > com>
> > > > > > > ---
> > > > > > > changes of v4
> > > > > > >  -fix typo of commit message
> > > > > > >  -rename: pci_reconfigure_bridge_ltr()-
> > > > > > > > pci_bridge_reconfigure_ltr()
> > > > > > > 
> > > > > > > changes of v3
> > > > > > >  -call pci_reconfigure_bridge_ltr() in probe.c
> > > > > > > changes of v2
> > > > > > >  -modify patch description
> > > > > > >  -reconfigure bridge's LTR before restoring device
> > > > > > > DEVCTL2
> > > > > > > register
> > > > > > > ---
> > > > > > >  drivers/pci/pci.c   | 25 +++++++++++++++++++++++++
> > > > > > >  drivers/pci/pci.h   |  1 +
> > > > > > >  drivers/pci/probe.c | 13 ++++++++++---
> > > > > > >  3 files changed, 36 insertions(+), 3 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > > > > > > index b9fecc25d213..6bf65d295331 100644
> > > > > > > --- a/drivers/pci/pci.c
> > > > > > > +++ b/drivers/pci/pci.c
> > > > > > > @@ -1437,6 +1437,24 @@ static int
> > > > > > > pci_save_pcie_state(struct
> > > > > > > pci_dev *dev)
> > > > > > >       return 0;
> > > > > > >  }
> > > > > > > 
> > > > > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
> > > > > > > +{
> > > > > > > +#ifdef CONFIG_PCIEASPM
> > > > > > > +     struct pci_dev *bridge;
> > > > > > > +     u32 ctl;
> > > > > > > +
> > > > > > > +     bridge = pci_upstream_bridge(dev);
> > > > > > > +     if (bridge && bridge->ltr_path) {
> > > > > > > +             pcie_capability_read_dword(bridge,
> > > > > > > PCI_EXP_DEVCTL2, &ctl);
> > > > > > > +             if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
> > > > > > > +                     pci_dbg(bridge, "re-enabling
> > > > > > > LTR\n");
> > > > > > > +                     pcie_capability_set_word(bridge,
> > > > > > > PCI_EXP_DEVCTL2,
> > > > > > > +                                              PCI_EXP_DE
> > > > > > > V
> > > > > > > CTL2
> > > > > > > _L
> > > > > > > TR_EN);
> > > > > > 
> > > > > > This pattern of updating the upstream bridge on behalf of
> > > > > > "dev"
> > > > > > is
> > > > > > problematic because it's racy:
> > > > > > 
> > > > > >   CPU 1                     CPU 2
> > > > > >   -------------------       ---------------------
> > > > > >   ctl = read DEVCTL2        ctl = read(DEVCTL2)
> > > > > >   ctl |= DEVCTL2_LTR_EN     ctl |= DEVCTL2_ARI
> > > > > >   write(DEVCTL2, ctl)
> > > > > >                             write(DEVCTL2, ctl)
> > > > > > 
> > > > > > Now the bridge has ARI set, but not LTR_EN.
> > > > > > 
> > > > > > We have the same problem in the pci_enable_device()
> > > > > > path.  The
> > > > > > most
> > > > > > recent try at fixing it is [1].
> > > 
> > > I was hoping you would respond with "yes, I understand the
> > > problem,
> > > but don't think it's likely" or "no, this isn't actually a
> > > problem
> > > because ..."
> > > 
> > > I think it *is* a problem, but we're probably unlikely to hit it,
> > > so
> > > we can probably live with it for now.
> > > 
> > 
> > Yes, I understand the problem. I also think it unlikely to hit and
> > we
> > can probably live with it for now.
> > Thanks.
> 
> Given that LTR applies to only PCI Express devices, and 2 of such
> devices cannot be simultaneously hot-added under the same parent, I
> think it is highly unlikely to hit.
> I agree that it is a problem in general though. But It doesn't look
> like we are not any close to a merging the other patch series Bjorn
> pointed out (https://lore.kernel.org/linux-pci/20201218174011.340514-
> 2-s.miroshnichenko@yadro.com/).
> 
> So perhaps we could merge this patch, and while this patch may not be
> ideal, it helps in fixing the current set of issues seen with hotplug
> of thunderbolt devices (which are very noticable on Intel chromebooks
> atleast since it prevents them from going into S0ix)?
> 
> Thanks,
> 
> Rajat
> 
> 
> 
> > 
> > > > > > [1] https://lore.kernel.org/linux-pci/20201218174011.340514
> > > > > > -2-
> > > > > > s.mir
> > > > > > os
> > > > > > hnichenko@yadro.com/
> > > > > > 
> > > > > > > +             }
> > > > > > > +     }
> > > > > > > +#endif
> > > > > > > +}
> > > > > > > +
> > > > > > >  static void pci_restore_pcie_state(struct pci_dev *dev)
> > > > > > >  {
> > > > > > >       int i = 0;
> > > > > > > @@ -1447,6 +1465,13 @@ static void
> > > > > > > pci_restore_pcie_state(struct
> > > > > > > pci_dev *dev)
> > > > > > >       if (!save_state)
> > > > > > >               return;
> > > > > > > 
> > > > > > > +     /*
> > > > > > > +      * Downstream ports reset the LTR enable bit when
> > > > > > > link
> > > > > > > goes down.
> > > > > > > +      * Check and re-configure the bit here before
> > > > > > > restoring
> > > > > > > device.
> > > > > > > +      * PCIe r5.0, sec 7.5.3.16.
> > > > > > > +      */
> > > > > > > +     pci_bridge_reconfigure_ltr(dev);
> > > > > > > +
> > > > > > >       cap = (u16 *)&save_state->cap.data[0];
> > > > > > >       pcie_capability_write_word(dev, PCI_EXP_DEVCTL,
> > > > > > > cap[i++]);
> > > > > > >       pcie_capability_write_word(dev, PCI_EXP_LNKCTL,
> > > > > > > cap[i++]);
> > > > > > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > > > > > index 5c59365092fa..b3a5e5287cb7 100644
> > > > > > > --- a/drivers/pci/pci.h
> > > > > > > +++ b/drivers/pci/pci.h
> > > > > > > @@ -111,6 +111,7 @@ void pci_free_cap_save_buffers(struct
> > > > > > > pci_dev
> > > > > > > *dev);
> > > > > > >  bool pci_bridge_d3_possible(struct pci_dev *dev);
> > > > > > >  void pci_bridge_d3_update(struct pci_dev *dev);
> > > > > > >  void pci_bridge_wait_for_secondary_bus(struct pci_dev
> > > > > > > *dev);
> > > > > > > +void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
> > > > > > > 
> > > > > > >  static inline void pci_wakeup_event(struct pci_dev *dev)
> > > > > > >  {
> > > > > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > > > > > index 953f15abc850..ade055e9fb58 100644
> > > > > > > --- a/drivers/pci/probe.c
> > > > > > > +++ b/drivers/pci/probe.c
> > > > > > > @@ -2132,9 +2132,16 @@ static void
> > > > > > > pci_configure_ltr(struct
> > > > > > > pci_dev
> > > > > > > *dev)
> > > > > > >        * Complex and all intermediate Switches indicate
> > > > > > > support
> > > > > > > for LTR.
> > > > > > >        * PCIe r4.0, sec 6.18.
> > > > > > >        */
> > > > > > > -     if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> > > > > > > -         ((bridge = pci_upstream_bridge(dev)) &&
> > > > > > > -           bridge->ltr_path)) {
> > > > > > > +     if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> > > > > > > +             pcie_capability_set_word(dev,
> > > > > > > PCI_EXP_DEVCTL2,
> > > > > > > +                                      PCI_EXP_DEVCTL2_LT
> > > > > > > R
> > > > > > > _EN)
> > > > > > > ;
> > > > > > > +             dev->ltr_path = 1;
> > > > > > > +             return;
> > > > > > > +     }
> > > > > > > +
> > > > > > > +     bridge = pci_upstream_bridge(dev);
> > > > > > > +     if (bridge && bridge->ltr_path) {
> > > > > > > +             pci_bridge_reconfigure_ltr(dev);
> > > > > > >               pcie_capability_set_word(dev,
> > > > > > > PCI_EXP_DEVCTL2,
> > > > > > >                                        PCI_EXP_DEVCTL2_LT
> > > > > > > R
> > > > > > > _EN)
> > > > > > > ;
> > > > > > >               dev->ltr_path = 1;
> > > > > > > --
> > > > > > > 2.18.0
> > > > > > 
> > > > > > _______________________________________________
> > > > > > Linux-mediatek mailing list
> > > > > > Linux-mediatek@lists.infradead.org
> > > > > > http://lists.infradead.org/mailman/listinfo/linux-mediatek

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-10-12  8:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-04  9:51 [v4] PCI: Avoid unsync of LTR mechanism configuration mingchuang.qiao
2021-02-04 10:00 ` Mika Westerberg
2021-02-18 16:50 ` Bjorn Helgaas
2021-03-22  0:41   ` Mingchuang Qiao
2021-09-06  5:36   ` mingchuang qiao
2021-09-30  7:02     ` mingchuang qiao
2021-09-30 19:48       ` Bjorn Helgaas
2021-10-08  6:30         ` mingchuang qiao
2021-10-12  2:48           ` Rajat Jain
2021-10-12  8:01             ` Mingchuang Qiao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).